Skip to content

Instantly share code, notes, and snippets.

@DreamOfTranscendence
Last active September 19, 2025 02:52
Show Gist options
  • Select an option

  • Save DreamOfTranscendence/1660dd5d50857ea3686e321d0f6a89fe to your computer and use it in GitHub Desktop.

Select an option

Save DreamOfTranscendence/1660dd5d50857ea3686e321d0f6a89fe to your computer and use it in GitHub Desktop.
c++ Syntax highlighter in JavaScript that outputs HTML formated highlighting from c++ code files input as a string
//Importiant: original code author:
//grabbed from: https://github.com/AdrS/CppToHTML
//GNU liscense or something IDK
//USAGE: Select and upload file from the file input element if you can find it,
//run this file on example.com or a blank webpage
// run: var htmlString=cpp_syntax_hi.htmlPrettyPrint(cpp_syntax_hi.files_e[0].textC), od2=document.createElement("div"); od2.innerHTML=htmlString; document.body.appendChild(od2);
//I might need this to make: https://gist.github.com/DreamOfTranscendence/0dbb2267f4ebd19441869fe793f46cab
/*
#include <string>
#include <iostream>
#include <cstdio>
*/
(function cpp_syntax_hi(){ //context isolator wrapper function?
/*enum TokenType*/ var PREPROCESSOR=0,COMMENT=1,KEYWORD=2,IDENTIFIER=3,CONSTANT=4,STRING=5,SYMBOLS=6,S_SYMBOL=7; var TokenType={PREPROCESSOR:0,COMMENT:1,KEYWORD:2,IDENTIFIER:3,CONSTANT:4,STRING:5,SYMBOLS:6,S_SYMBOL:7};
self.enum_TokenType=TokenType;
var pos=0,
n_blockbrak=[], /* pair_blockbrak={"{":"}","[":"]","(":")", "}":"{","]":"[",")":"("},*/ s_blockbrak="{}[]()", //turns out I do not need pair_blockbrak
saPos=0; //instance number ids used.
//n_blockbrak items are arrays: ["{",209] /*blockbrak_char, instance number int id*/
var _sym_ids;
/*std::string*/ function tokenTypeToText(/*TokenType*/ t) {
switch (t) {
case PREPROCESSOR: return "PREPROCESSOR";
case COMMENT: return "COMMENT";
case STRING: return "STRING";
case CONSTANT: return "CONSTANT";
case SYMBOLS: return "SYMBOLS";
case KEYWORD: return "KEYWORD";
case IDENTIFIER: return "IDENTIFIER";
case S_SYMBOL: return "S_SYMBOL";
default: return "bs";
}
};
/*struct*/var Token; (function struct_Token(){ //silly struct and/or class polyfill :)
var proto={type:undefined, contents:undefined};
Token=function Token(s, t){
var rto=Object.assign({},proto); rto.type=t; rto.__proto__=proto;
if(typeof s=="string") rto.contents=s; else rto.contents=String(s); // make polyfill? for arrays and uint8arrays ??
return rto;
};
proto.constructor=Token; Token.prototype=proto;
})();
function isS_Symbol( c){
//pos
var si=s_blockbrak.indexOf(c)+1, n_L, rid, isEnd=!(si&1); // is literal ("{}[]()") faster than refrence outer scope var? s_blockbrak
if(si){
if(isEnd){
n_L=n_blockbrak[n_blockbrak.length-1];
if(s_blockbrak.indexOf(n_L[0])+2==si){ n_blockbrak.pop(),(rid=n_L[1]); }
else rid="lonely_sbbp_"+saPos;
}else{ //isBegin
rid="sbbp_"+saPos;
n_blockbrak.push([c,rid]);
}
si=Token(c,S_SYMBOL); si.sym_id=rid; si.isEnd=isEnd;
saPos++;
}else si=false;
return si;
};
function isWhiteSpace( c) { // f with "\u000c" ?
return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f';
};
function isDecimalDigit( c) {
return c >= '0' && c <= '9';
};
function isOctalDigit( c) {
return c >= '0' && c < '8';
};
function isHexDigit( c) {
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
};
function skipWhiteSpace(/*const std::string&*/ s, /*unsigned int& pos */) { //make pos have an object property, pos? , which is the actual value
let L=s.length; while (isWhiteSpace(s[pos]) && pos < L)pos++;
};
function isSymbol(/*char*/ c) {
const s = "~!%^&*()-+={[}]|:;,<.>/?";
//return !!(s.indexOf(c)+1); //will improve performance?
for (var i = 0; i < 24; i++)
if (c == s[i])return true;
return false;
};
function isValidNameCharacter( c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_';
};
const /*char**/ keywords_ = [ "alignas", "alignof", "and", "and_eq", "asm", "auto", "bitand", "bitor", "bool", "break", "case", "catch", "char", "char16_t", "char32_t", "class", "compl", "const", "constexpr", "const_cast", "continue", "decltype", "default", "delete", "do", "double", "dynamic_cast", "else", "enum", "explicit", "export", "extern", "false", "final", "float", "for", "friend", "goto", "if", "inline", "int", "long", "mutable", "namespace", "new", "noexcept", "not", "not_eq", "nullptr", "operator", "or", "or_eq", "override", "private", "protected", "public", "register", "reinterpret_cast", "return", "short", "signed", "sizeof", "static", "static_assert", "static_cast", "struct", "switch", "template", "this", "thread_local", "throw", "true", "try", "typedef", "typeid", "typename", "union", "unsigned", "using", "virtual", "void", "volatile", "wchar_t", "while", "xor", "xor_eq" ];
function isKeyword(/*std::string&*/ s) {
var keywords=keywords_;
var min = 0, max = 85, guess, comp,kw;//,L=700;
while (max >= min/*&&L--*/) {
guess = (min + max) >> 1; //divide by 2 with math.floor. (simulate int behavior)
kw=keywords[guess]; comp = kw==s?0:(s>kw?1:-1); //s.compare(keywords[guess]);
if (comp == 0) {
return true;
}
else if (comp /*> 0 */ == 1) {
min = guess + 1;
}
else {
max = guess - 1;
}
//console.log("min = "+min+"; max = "+max+"; comp = "+comp+"; guess = "+guess+";");
};
//console.log("L is "+L);
return false;
};
/*
add support for multi line preprocessor derectives
add support for decimals constants ex: 1.09E-5
*/
function/*Token*/ getNextToken( s/*, unsigned int& pos */) {
var temp = "";
var start = pos, //.value?
tmp2;
while (pos < s.length) {
if (s[pos] == '/') {
if (pos + 1 < s.length && s[pos + 1] == '/' || s[pos + 1] == '*') { //comment
pos += 2;
if (s[pos - 1] == '/') { //single line comment (get everythinig up to end of line)
while (pos < s.length && s[pos] != '\n') { pos++; }
return Token(s.substr(start, pos - start), COMMENT);
}
else { //multiline comment (get everything until closing comment, or end of file)
while (pos < s.length && !(pos > start + 2 && s[pos - 2] == '*' && s[pos - 1] == '/')) { pos++; }
return Token(s.substr(start, pos - start), COMMENT);
}
}
else { //operator (must be the '/' operator
pos++;
return Token("/", SYMBOLS);
}
}
else if (s[pos] == '#') { //preprocessor directive (get everything until end of line, end of file, or start of a comment
while (pos < s.length && s[pos] != '\n' && !(pos + 1 < s.length && s[pos] == '/' && (s[pos + 1] == '*' || s[pos + 1] == '/'))) { pos++; }
return Token(s.substr(start, pos - start), PREPROCESSOR);
}
else if (s[pos] == '"') { //string literal (get everything until closing quoteation mark or end of file, ignore escaped quotes)
while (pos < s.length && !(s[pos + 1] == '"' && s[pos] != '\\')) { pos++; }
pos += 2;
return Token(s.substr(start, pos - start), STRING);
}
else if (s[pos]=="'"){ //character (char) literal
while (pos < s.length && !(s[pos + 1] == "'" && s[pos] != '\\')) { pos++; }
pos += 2;
return Token(s.substr(start, pos - start), STRING);
}
else if (isDecimalDigit(s[pos])) { //numerical constant (get until end of number)
if (s[pos] == '0' && pos + 2 < s.length) {
if ((s[pos + 1] == 'o' || s[pos + 1] == 'O') && isOctalDigit(s[pos + 2])) { //octal digit
pos += 3;
while (pos < s.length && isOctalDigit(s[pos])) { pos++; }
}
else if ((s[pos + 1] == 'x' || s[pos + 1] == 'X') && isHexDigit(s[pos + 2])) { //hex digit
pos += 3;
while (pos < s.length && isHexDigit(s[pos])) { pos++; }
}
else {
while (pos < s.length && isDecimalDigit(s[pos])) { pos++; }
}
}
else {
while (pos < s.length && isDecimalDigit(s[pos])) { pos++; }
}
return Token(s.substr(start, pos - start), CONSTANT);
}
else if( tmp2= isS_Symbol(s[pos]) ){ //intentional assignment in conditional
pos++;
return tmp2; // M A G I C
}
else if (isSymbol(s[pos])) { //get all symbols (stop at first non symbol or comment
pos++;
while (pos < s.length && isSymbol(s[pos]) && !(s[pos] == '/' && pos + 1 < s.length && (s[pos + 1] == '/' || s[pos + 1] == '*'))) { pos++; }
return Token(s.substr(start, pos - start), SYMBOLS);
}
else if (isValidNameCharacter(s[pos]) && !isDecimalDigit(s[pos])) { //keyword, or identifier
pos++;
while (pos < s.length && isValidNameCharacter(s[pos])) { pos++; }
temp = s.substr(start, pos - start);
if (isKeyword(temp)) {
return Token(temp, KEYWORD);
}
else {
return Token(temp, IDENTIFIER);
}
}
else {
while (pos < s.length && s[pos] != '/' && s[pos] != '"' && !isValidNameCharacter(s[pos]) && !isSymbol(s[pos])) { pos++; }
return Token("/* analysis fail? : \n"+s.substr(start, pos - start).replace(/\/\*/g,"//END original multiline comment*/ /*")+"*/", COMMENT); //Token("//not writen yet", COMMENT);
}
}
return Token("//end", COMMENT);
};
function htmlPrettyPrint(/*const std::string&*/ s, /*bool*/ ofmt) { // ofmt is wholedoc, and set ofmt to 2 for info array
pos=0; var /*pos = {value:0},*/ start,
tmp1;
n_blockbrak=[]; _sym_ids={};
var ret;
if(ofmt==1){ ret= "<!DOCTYPE html>\n<html>\n<head>\n </head>\n<body>\n"; }else if(ofmt==2) ret=[]; else ret="";
if(ofmt!=2) ret+= "<style>\n pre.CODE { background-color:#000000;}\n span.PREPROCESSOR { color:#FFBBBB; }\n span.COMMENT { color:#33FF33; }\n span.KEYWORD { color:#FF33FF; }\n span.IDENTIFIER { color:#FFFFFF; }\n span.CONSTANT { color:#FF9933; }\n span.STRING { color:#EE8726; }\n span.SYMBOLS { color:#63E3FF; }\n span.S_SYMBOL { color:#5050FF; font-weight:bold; }\n //span.bigCollapseAble::before { content:\"\\/\"; background-color:#DDD; } \n </style>\n<pre class=\"CODE\">\n";
while (pos < s.length) {
start = pos;
skipWhiteSpace(s/*, pos*/); //skip initial whitespace
if(pos!=start){ if(ofmt==2) ret.push({"type":"ws","contents":s.substring(start,pos)}); else ret += s.substr(start, pos - start); }
var t = getNextToken(s/*, pos*/);
if(ofmt==2){
if(t.type==S_SYMBOL){ if(!_sym_ids[t.sym_id]) _sym_ids[t.sym_id]=[]; _sym_ids[t.sym_id][t.isEnd&1]=ret.length; }
ret.push(t); // or, ret.push([t.type,t.contents]);
}else{
switch (t.type) {
case PREPROCESSOR:
ret += ("<span class=\"PREPROCESSOR\">" + t.contents + "</span>");
break;
case COMMENT:
ret += ("<span class=\"COMMENT\">" + t.contents + "</span>");
break;
case STRING:
ret += ("<span class=\"STRING\">" + t.contents + "</span>");
break;
case CONSTANT:
ret += ("<span class=\"CONSTANT\">" + t.contents + "</span>");
break;
case SYMBOLS:
ret += ("<span class=\"SYMBOLS\">" + t.contents + "</span>");
break;
case S_SYMBOL:
if(!t.isEnd) ret+="<span class=\"bigCollapseAble\" >";
ret += ("<span class=\"S_SYMBOL\" name=\""+t.sym_id+"\">" + t.contents + "</span>");
if(t.isEnd) ret+="</span>";
break;
case KEYWORD:
ret += ("<span class=\"KEYWORD\">" + t.contents + "</span>");
break;
case IDENTIFIER:
ret += ("<span class=\"IDENTIFIER\">" + t.contents + "</span>");
break;
default:null;
}
}
}
if(ofmt==2) ret.sym_ids=_sym_ids; else ret += "</pre>";
if(ofmt==1) ret+="\n</body>\n</html>";
return ret; //std::cout << ret;
};
//int main(int argc, const char** argv) {
/*if (argc != 2) {
std::cout << "usage: main src.cpp" << std::endl;
return 0;
}*/ /*
FILE* f = fopen("G:\\cppToHtml\\cppPrettyHtml.cpp", "r");
if (!f)return 0;
fseek(f, 0, SEEK_END);
size_t size = ftell(f);
fseek(f, 0, SEEK_SET);
char* data = new char[size + 1];
fread(data, 1, size, f);
fclose(f);
data[size] = '\0';
htmlPrettyPrint(std::string(data)); //this here is the ONE that does it.
return 0;
} */
"fin / end";
var fi=document.createElement("input");
fi.type="file"; fi.multiple=true;
//fi.setAttribute("webkitdirectory","true");
fi.onchange=function(a){
//filereader is probably depricated, ug. well it sucked anyway, but promise based coding sucks too.
var fa=fi.files,L=fa.length,i=L,ti=L,c;
while(i--){ c=fa[i]; c.text().then(function(s){ c.textC=s; ti--; if(ti==0&&typeof fi.ontextready=="function")fi.ontextready(); }); };
//using .text() promise because it's probably faster, .bytes() or .arrayBuffer() is probably better in case ansi or other non-utf8 files, but then I have to take the time to convert uint8array to string
};
document.body.appendChild(fi);
self.cpp_syntax_hi={"htmlPrettyPrint":htmlPrettyPrint,"file_e":fi};
var lasterHi,
changeDisplayBrak=function(name,ONstate){
var ei=document.getElementsByName(name); for(var n=0,L=ei.length;n!=L;n++) ei[n].style.backgroundColor=ONstate?"#FFFF00":"";
};
self.onmouseover=function(e){ //highlight matching brackets/ blocks / parenthesis
var t=e.target||e.srcElement,ei,cs=t.getAttribute("class");
if(lasterHi&&lasterHi.length) changeDisplayBrak(lasterHi,false); //turn off when move off, use optional timeout setting?
lasterHi=t.getAttribute("name");
if(cs&&cs.indexOf("S_SYMBOL")!=-1) changeDisplayBrak(lasterHi,true);
};
})(); //end of //context isolator wrapper function?
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment