Last active
September 19, 2025 05:18
-
-
Save DreamOfTranscendence/0dbb2267f4ebd19441869fe793f46cab to your computer and use it in GitHub Desktop.
Trying to convert some C++ code to javascript, automating some repetative text replacement
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| //https://gist.github.com/DreamOfTranscendence/1660dd5d50857ea3686e321d0f6a89fe | |
| var js_req2=function js_require2_xhr(url){ | |
| var xhr=new XMLHttpRequest(); | |
| xhr.open("GET",url); | |
| xhr.responseType="text"; | |
| xhr.onload=function(){ | |
| var j=document.createElement("script"); j.type="text/javascript"; j.setAttribute("data-from-url",url); | |
| j.innerHTML=xhr.responseText; document.body.appendChild(j); | |
| }; | |
| xhr.send(); | |
| }; | |
| js_req2("https://gist.github.com/DreamOfTranscendence/1660dd5d50857ea3686e321d0f6a89fe/raw/eff952f82eab709010618d0ab12f5e89212a74be/cpp_syntax_hi_1.js"); //syntax highlighter for starters | |
| var keys_kept; | |
| /* //amazing :) | |
| keys_kept=[]; | |
| (function keepers_lol(){ | |
| var keywords_ = [ "alignas", "alignof", "and", "and_eq", "asm", "auto", "bitand", "bitor", "bool", "break", | |
| "case", "catch", "char", "char16_t", "char32_t", "class", "compl", "const", "constexpr", "const_cast", "continue", | |
| "decltype", "default", "delete", "do", "double", "dynamic_cast", "else", "enum", "explicit", "export", "extern", | |
| "false", "final", "float", "for", "friend", "goto", "if", "inline", "int", "long", "mutable", | |
| "namespace", "new", "noexcept", "not", "not_eq", "nullptr", "operator", "or", "or_eq", "override", | |
| "private", "protected", "public", "register", "reinterpret_cast", "return", | |
| "short", "signed", "sizeof", "static", "static_assert", "static_cast", "struct", "switch", | |
| "template", "this", "thread_local", "throw", "true", "try", "typedef", "typeid", "typename", | |
| "union", "unsigned", "using", "virtual", "void", "volatile", "wchar_t", "while", "xor", "xor_eq" ]; | |
| for(var i=keywords_.length;i>=0;i--) try{ eval("var "+keywords_[i]+"=true;"); }catch(e){ keys_kept.push(keywords_[i]); }; | |
| })(); | |
| */ | |
| keys_kept=["break","case","catch","class","const","continue","default","delete","do","else",/*"enum"*/,"export", | |
| "false","for","if","new","return","switch","this","throw","true","try",/*"void",*/ "while"]; | |
| //'safe' keywords, removing 'void' because it doesn't have exactly the same function as in c++ | |
| //and 'enum' because it's a useless keyword in js | |
| for(var $x=keys_kept.length;$x--;null){ keys_kept[keys_kept[$x]]=true; } //object[property] is much faster than array.indexOf | |
| var keys_opr_trant={ | |
| "and":"&&", | |
| "and_eq":"&=", | |
| "bitand":"&", | |
| "bitor":"|", | |
| "not":"!", | |
| "not_eq":"!=", | |
| "or":"||", | |
| "or_eq":"|=", | |
| "xor":"^", | |
| "xor_eq":"^=" | |
| }; | |
| var keys_var={ // oops all var types are pointer array-able in c++ | |
| "auto":true, | |
| "bool":true, | |
| "std::string":true, | |
| "string":true, | |
| "wchar_t":true, | |
| "void":true, //void is here becaue it's probably a void pointer or a void returning function | |
| "nullptr":true, | |
| "int":true, | |
| "float":true, | |
| "double":true, | |
| "char32_t":true, | |
| "char16_t":true, | |
| "char":true, | |
| "const":true, | |
| "register":true //is this the correct spot for the "register" keyword ? | |
| }; // just replace any of these keywords with "var" or sometimes "const" :) | |
| var keys_cc_obj={ union:true, class:true, struct:true }; | |
| //cc = c constructor, ex new ClassName(properties_defined); | |
| //var to add c++ programmer-defined objects to, like individual instances of a union or class | |
| var keys_udf_obj={}; | |
| // namespace is a special case, it only has one object instance per name at a level | |
| //"typename","typeid","typedef" //whatdido? | |
| var keys_varmod = { | |
| //keys that modify behavior/type of declared vars/objects | |
| "extern":true, //this might be a special case ? (declare self.varname also maybe when extern is encountered or self.var_exports.varname? ) | |
| "explicit":true, | |
| "friend":true, //no - sorry all properties will be public in my implementation (it's just EASIER and a project like this is kinda hard enough already in general) | |
| "long":true, | |
| "unsigned":true, | |
| "inline":true, //not implementing this yet, just ignore (function body code substitution for call) | |
| "private":true, | |
| "protected":true, | |
| "public":true, | |
| "short":true, | |
| "signed":true, | |
| "static":true, | |
| "override":true, //replace function from template class in the current class scope | |
| }; | |
| //special: namespace, enum, using, sizeof (it's .length?), operator | |
| //for sizeof: | |
| if(false){ | |
| var num_size=Math.log2(Number.MAX_VALUE.toString(2).length)+Number.MAX_SAFE_INTEGER.toString(2).length+1; | |
| var cpp_sizeof=function(vAr){ //faker :) | |
| var _t; | |
| switch(typeof vAr) | |
| { | |
| case "number": | |
| return num_size; | |
| case "object": | |
| case "bigint": | |
| if(_t=vAr.constructor.BYTES_PER_ELEMENT) return _t*vAr.length; | |
| if(vAr.constructor == self.BigInt) return 8; | |
| if(_t=vAr.byteLength) return _t; | |
| if(_t=vAr.length) return _t; | |
| break; //end "object" | |
| case "string": | |
| return vAr.length<<1; | |
| case "function": | |
| return vAr.toString().length<<1; | |
| default: | |
| return null; | |
| }; | |
| }; | |
| } | |
| //for keyword "operator", need to build table of overloaded operators as functions, then replace all instances of where they're used with function calls to the overload | |
| var keys_remandr=["alignas", "alignof", "asm" /*assembly oops whatdido? */, "compl", "constexpr", "const_cast", "decltype", "dynamic_cast", "final", "goto", "mutable", "noexcept", "reinterpret_cast", "static_assert", "static_cast", "template", "thread_local", "typedef", "typeid", "typename", "virtual", "volatile"]; | |
| //end of js keyword list section | |
| //cpp & h files in: | |
| var codefiles, fi=document.createElement("input"); | |
| fi.type="file"; fi.multiple=true; | |
| fi.setAttribute("webkitdirectory","true"); | |
| fi.onchange=function(a){ | |
| //filereader is probably depricated, ug. well it sucked anyway, but promise based coding sucks too. | |
| var fa=fi.files,i=fa.length,c; | |
| while(i--){ c=fa[i].text().then(function(s){ c.textC=s; }); }; //using .text() promise because it's probably faster, .bytes() of .arrayBuffer() is probably better in case ansi or other non-utf8 files, but then I have to take the time to convert uint8array to string | |
| }; | |
| document.body.appendChild(fi); | |
| //as you can see, I'm using a c++ syntax highlighter in browser js to have something to build on for parsing the code to ignore strings, comments, > regex < (c++ doesn't have inline regex, it's strings and objects only, it's easier) | |
| // #ifdef ENABLE_HE , ports to: if("undefined"!=typeof ENABLE_HE ){ eval("/*conditional compile code in escaped string or function_unused.toString() formant here*/ | |
| // #define OPCODE(i, x2) _opcodes[i]._OPCODE(ScummEngine_v100he, x2), ports to function OPCODE(i, x2){ return _opcodes[i]._OPCODE(ScummEngine_v100he, x2); }; | |
| // #endif , ports to: "); } or ).toString()); } | |
| //namespace Scumm {, ports to: try{var Scumm=false;}catch(e){ console.log('namespace already declared'); }; Scumm=js_namespace(function namespace_Scumm(){ /*code in namespace till end namespace block bracket here: */ },"Scumm"); | |
| //ARRAYSIZE(array_variable_or_obj) ports to (array_variable_or_obj).length (add a .length polyfill to the prototype of everything that is arraysized, but they SHOULD theoretically be all converted to arrays, both arrays, pointer pointers squences that aren't strings, and vectors should all be converted to ordinary javascript arrays, except the special cases of byte, char, and various int type arrays which should all be converted to their respective javascript equiv such as Uint8Array and Float32Array | |
| //array <int 4> my_aay= { 0, 0, 1, 5 }; ports to: var my_aay=[0,0,1,5]; //sQuIsH XD all variable declaration keywords down to one of: var, let, const; . prefer var in all cases for compatiblity | |
| //https://en.cppreference.com/w/c/header/stdint | |
| var array_type_conv_data = | |
| [ | |
| //int or uint, anything or nothing, then NumOfBits_t, so for a "int_least32_t" type we delete the "_least" section out of the middle to get "int32_t" which then converts to Int32Array, only the uint_fast8_t is special because JS has Uint8ClampedArray? ( W A R N I N G : this "fast8" thing could be a misunderstanding on my part and could lead to unpredicable behavior of ported code) | |
| ["JavaScript Type", "Value Range", "Size in bytes", "Web IDL type", "aliases" ], | |
| ["Int8Array", "-128 to 127", "1", "byte", ["int8_t"] ], | |
| ["Uint8Array", "0 to 255", "1", "octet", ["byte", "char", "uint8_t"] ], | |
| ["Uint8ClampedArray", "0 to 255", "1", "octet", ["byte" ,"char", "uint8_t", "uint_fast8_t"] ], //when to use this? //maybe use clamped for uint_fast8_t type? | |
| ["Int16Array", "-32768 to 32767", "2", "short", ["short int", "int16_t"] ], | |
| ["Uint16Array", "0 to 65535", "2", "unsigned short", ["unsigned short int", "uint16_t", "char16_t"] ], | |
| ["Int32Array", "-2147483648 to 2147483647", "4", "long", ["int", "int32_t"] ], | |
| ["Uint32Array", "0 to 4294967295", "4", "unsigned long", ["unsigned int", "uint32_t", "char32_t"] ], | |
| ["Float16Array", "-65504 to 65504", "2", null, ["short float"] ], | |
| ["Float32Array", "-3.4e38 to 3.4e38", "4", "unrestricted float", ["float"] ], | |
| ["Float64Array", "-1.8e308 to 1.8e308", "8", "unrestricted double", ["double"] ], | |
| ["BigInt64Array", "-2^63 to 2^63 - 1", "8", "bigint", ["long long int"] ], | |
| ["BigUint64Array", "0 to 2^64 - 1", "8", "bigint", ["long long unsigned int"] ] | |
| ]; | |
| if(!self.BigInt64Array){ /*load a good polyfill library here*/ } | |
| var cpp_to_js_darray_lookup={"default":["BigInt64Array"]}; | |
| if(false){ //"comment" out lookup table builder | |
| for(var i=1,L=array_type_conv_data.length,c;i!=L;i++){ | |
| c=array_type_conv_data[i]; | |
| if(c[0] in self){ | |
| for(var j=0,L2=c[4].length,a2=c[4].slice(0,L2),saa,z=(L2++&&a2.push(c[3])); j!=L2; j++){ | |
| if(a2[j]){ | |
| if(!cpp_to_js_darray_lookup[a2[j]]) cpp_to_js_darray_lookup[a2[j]]=[]; | |
| saa=cpp_to_js_darray_lookup[a2[j]]; | |
| if( saa[0] && ( saa[0].length>c[0] || ( c[0].indexOf("Uint")!=-1 && saa[0].indexOf("Uint")==-1 ) ) ) z="unshift"; else z="push"; | |
| saa[z](c[0]); | |
| } | |
| }; | |
| } //sheeeesh | |
| else console.error("array type: "+c[0]+" not supported in this browser environment"); | |
| }; | |
| } | |
| Object.assign(cpp_to_js_darray_lookup, | |
| {"int8_t":["Int8Array"],"byte":["Uint8Array","Int8Array","Uint8ClampedArray"],"char":["Uint8Array","Uint8ClampedArray"],"uint8_t":["Uint8Array","Uint8ClampedArray"],"octet":["Uint8Array","Uint8ClampedArray"],"uint_fast8_t":["Uint8ClampedArray"],"short int":["Int16Array"],"int16_t":["Int16Array"],"short":["Int16Array"],"unsigned short int":["Uint16Array"],"uint16_t":["Uint16Array"],"char16_t":["Uint16Array"],"unsigned short":["Uint16Array"],"int":["Int32Array"],"int32_t":["Int32Array"],"long":["Int32Array"],"unsigned int":["Uint32Array"],"uint32_t":["Uint32Array"],"char32_t":["Uint32Array"],"unsigned long":["Uint32Array"],"short float":["Float16Array"],"float":["Float32Array"],"unrestricted float":["Float32Array"],"double":["Float64Array"],"unrestricted double":["Float64Array"],"long long int":["BigInt64Array"],"bigint":["BigUint64Array","BigInt64Array"],"long long unsigned int":["BigUint64Array"]} | |
| ); //hardcode lookup table yes? | |
| //I'm getting bored, so anything else can just be a Big(U)IntArray type with an extra .float property tacked on for the decimal point locaion | |
| /* | |
| Size of int: 4 bytes | |
| Size of char: 1 byte | |
| Size of float: 4 bytes | |
| Size of double: 8 bytes | |
| */ | |
| /* | |
| https://en.cppreference.com/w/cpp/language/types.html | |
| Size: | |
| short — target type will be optimized for space and will have width of at least 16 bits. | |
| long — target type will have width of at least 32 bits. | |
| long long — target type will have width of at least 64 bits. | |
| (since C++11) | |
| Note: as with all type specifiers, any order is permitted: unsigned long long int and long int unsigned long name the same type. | |
| */ | |
| Uint16Array.BYTES_PER_ELEMENT; //this property of all typedarrays will come in handy | |
| //could even make WackyAByteArray(aay_length, aay_treat (0=Uint | 1=int | 2 = float), BYTES_PER_ELEMENT) | |
| //that stores everyting as Uint32Arrays internally, with a custom .toString method | |
| // a <vector> byte or binary data array could be converted to ArrayBuffer | |
| // end of binary data < A R R A Y > section (whew) | |
| // :: and -> both port to . (dot) because EVERYTHING is an object in JavaScript | |
| //Importiant related script: | |
| //https://github.com/DreamOfTranscendence/jsscummvm/blob/master/src/cpp_handy_js_polyfill.js | |
| var generic_conv_infos=[ // t3 (token) .info and .flag properties | |
| "oopsie", // 0 | |
| "enum to numeric vars & object properties", // 1 | |
| "built js enum", // 2 | |
| "", //3 | |
| "unknown item?", //4 | |
| "fail" //5 | |
| ]; | |
| var partlyport=function partiallyPortCpp2js(cpp_files_a){ | |
| var PREPROCESSOR=0,COMMENT=1,KEYWORD=2,IDENTIFIER=3,CONSTANT=4,STRING=5,SYMBOLS=6,S_SYMBOL=7; | |
| //grabbed enum vars from linked code: https://gist.github.com/DreamOfTranscendence/1660dd5d50857ea3686e321d0f6a89fe | |
| var js_unfin_files_a_o=[], t1, t2, t3, t4, ctc, ret, i, j, L, L2; //loop 1 i, L; loop2 j, L2 | |
| var memO; | |
| //t4 and tn where n > 4 is hypertemp, multipurpose holding var, becareful, DO NOT use this var anywhere without first setting it's value very close before to where it will be used | |
| if(typeof cpp_files_a=="string"){ t1=cpp_files_a; cpp_files_a=[{"textC":t1,"name":"untitled.js"}]; } | |
| i=0; L=cpp_files_a.length; | |
| var sym_ids, bkj, fwj; //bkj is backwards j, for scanning backwards through tokens | |
| //sym_ids is for associating open/close symbol pairs, blocks array, parenthesis together | |
| while(i!=L){ | |
| t1=cpp_syntax_hi.htmlPrettyPrint(cpp_files_a[i].textC,2); | |
| //ret=""; | |
| sym_ids={}; | |
| memO={ prep_valid_if:0 }; | |
| j=0; L2=t1.length; | |
| while(j!=L2){ | |
| t3=t1[j]; | |
| ctc=t3.contents; //t3 is CURrent token, t1 is current 'file' | |
| switch(t3.type){ | |
| case PREPROCESSOR: | |
| t4=ctc.indexOf("#if")==0; | |
| if(t4&&ctc.indexOf("def ")==3){ | |
| t3.contents = 'if("undefined"!='+ctc.substr(7)+'){ /* #ifdef */'; | |
| memO.prep_valid_if++; | |
| }else if(ctc.indexOf("#endif"&&memO.prep_valid_if>0)==0){ | |
| t3.contents="} /* #endif */"; | |
| memO.prep_valid_if--; | |
| }else if(ctc.indexOf("#define")==0){ | |
| /* TODO: port define */ | |
| } | |
| else{ | |
| t3.contents = ("/* //not portable yet :(\n" + ctc.replace(/\*\//g,"*/ /*") + "*/"); | |
| t3.flag=5; | |
| } | |
| break; //end PREPROCESSOR (almost done?) | |
| /*case COMMENT: | |
| ret += ("<span class=\"COMMENT\">" + ctc + "</span>"); | |
| break; | |
| case STRING: | |
| ret += ("<span class=\"STRING\">" + ctc + "</span>"); | |
| break; | |
| case CONSTANT: //can just ignore numeric constants | |
| ret += ("<span class=\"CONSTANT\">" + ctc + "</span>"); | |
| break; */ | |
| /*case SYMBOLS: //do I need to do anything here? | |
| //ret += ("<span class=\"SYMBOLS\">" + ctc + "</span>"); | |
| break; */ | |
| case S_SYMBOL: //handle function porting in the identIFIER secton ? | |
| if( ! t3.sym_id in sym_ids) sym_ids[t3.sym_id]=[]; | |
| sym_ids[t3.sym_id][t3.isEnd&1]=j; //t3=t1[j]; | |
| if( memO.in_enum ){ | |
| // it's an Enum :) | |
| // memO.in_enum=[]; till the closing bracket appears, then while it's true (inside enum), change indentifiers to idntf:enum_index_int and push idntf to .in_enum, then when closing bracket appears, add enum var name declarations to enum beginning token; | |
| // W A R N I N G : using .splice will change some j indexes and possibly mess up indexing of other things !!! :(, therefore, DO NOT USE .splice | |
| if(memO.in_enum.length==0&& "{" == ctc && (!memO.enum_par) ){ memO.enum_par=[j]; } | |
| else if(memO.enum_par && ( "}" == ctc && (t4=sym_ids[t3.sym_id]) && t4[0] == memO.enum_par[0] ) ){ | |
| // build js enum inst | |
| t3.info=2; | |
| t1[memO.enum_bgn].contents+=" var "+memO.in_enum.join(",")+";"; | |
| console.log("finished enum, tokens "+memO.enum_bgn+" - "+j); | |
| memO.enum_par = ( memO.in_enum = ( memO.enum_bgn = false ) ); //clear enum temp workspace | |
| } //end of enum closing brack block ender | |
| null; //emotional support null statement; | |
| } //end in_enum | |
| else if(memO.in_ns) | |
| { | |
| if( "{" == ctc && (t4=memO.ns_id) ) | |
| { | |
| memO.ns_par=[j]; | |
| t3.contents=" (function ns_"+t4+"(){"; | |
| } | |
| else if( "}" == ctc && (t4=memO.ns_par) && t1[t4[0]].sym_id == t3.sym_id ) | |
| t3.contents += ");"; | |
| t3.info="end of namespace block for "+memO.ns_id; | |
| { | |
| } //end namespace closing brack block ender | |
| } //end in_namespace | |
| if(memO.in_obj_struct) | |
| { | |
| null; | |
| } //end obj_struct constructor section (class struct etc); | |
| // should make generic code-block inside/outside function ? | |
| /* //old syntax hilighter code | |
| if(!t.isEnd) ret+="<span class=\"bigCollapseAble\" >"; | |
| ret += ("<span class=\"S_SYMBOL\" name=\""+t.sym_id+"\">" + ctc + "</span>"); | |
| if(t.isEnd) ret+="</span>"; | |
| */ | |
| break; | |
| case KEYWORD: | |
| //ret += ("<span class=\"KEYWORD\">" + ctc + "</span>"); | |
| if(!keys_kept[ctc]){ //if not kept, then | |
| //c++ "union" is just an object | |
| //c++ "using" js: for(var p in Using_object) eval("var p = "+Using_object_name+"['"+p+"'];"); | |
| if(t4=keys_opr_trant[ctc]) t3.contents = t4; //logic operation keyword | |
| else if(ctc=="enum"){ | |
| t3.contents = "/*enum*/"; t3.info=1; | |
| memO.enum_bgn=j; | |
| memO.in_enum=[]; | |
| } //end if enum | |
| else if(keys_var[ctc]){ | |
| t4=memO.L_kvar; memO.L_kvar=[j]; | |
| if(t4) | |
| { | |
| t3.contents="/* "+ctc+" */"; //bloat? should just set to "" empty string? | |
| if(ctc=="const") t1[memO.var_dc_bgn].contents=ctc; | |
| } | |
| else | |
| { | |
| //if not already having a previous var key for the same declaration | |
| t3.contents=ctc=="const"?ctc:"var"; t3.info="was: "+ctc; | |
| memO.var_dc_bgn=j; | |
| } | |
| //end if not previous var key for same declaration | |
| } //end if keys_var[ctc] | |
| else if(keys_cc_obj[ctc]) | |
| { | |
| //object type, such as struct or simillar. | |
| } //end if keys_cc_obj[ctc] | |
| else if(ctc=="namespace") | |
| { | |
| // aaaaaaaaA, how to handle nested namespaces !! | |
| memO.in_ns=[j]; | |
| t3.contents="/* namespace */"; | |
| } //end if namespace | |
| else if(keys_cc_obj[ctc]) | |
| { | |
| memO.bgn_udo=[j]; //begin user-defined object | |
| // if(!keys_udf_obj[ctc //move to indentifier section | |
| } | |
| //else | |
| } // end of if not kept | |
| break; //end of KEYWORD: | |
| case IDENTIFIER: | |
| //ret += ("<span class=\"IDENTIFIER\">" + ctc + "</span>"); | |
| //check for is object property key here? or in s_symbol section? ex: classInstance::propertyName = value; | |
| if( memO.in_enum ) | |
| { | |
| if( memO.enum_par ) | |
| { //if in the enum code block | |
| t4=memO.in_enum.length; //start at 0 ? | |
| t3.contents=ctc+":"+t4; | |
| memO.in_enum.push(ctc+" = "+t4); | |
| //move .length here for start at 1 | |
| }else{ | |
| t3.info="is this the correct enum identifier?"; | |
| memO.idt_p= [ ctc, j ]; | |
| t3.contents="var "+ctc+" = "; | |
| } | |
| } //end of if in_enum | |
| else if(memO.in_ns) | |
| { //inside namespace declaration | |
| if(!memO.ns_id) | |
| { | |
| memO.ns_id=[j,ctc]; | |
| t3.contents="try{ var "+ctc+" ; }catch(e){ console.log('namespace "+ctc+" already declared, remove try statement?'); }; "+ctc+" = js_namespace( '"+ctc+"' , "; | |
| } | |
| } //end of in_ns | |
| break; | |
| default: t3.flag=4; // keep t3.contents | |
| }; //end switch t3.type | |
| if( (t4=t3.type) != "ws" && t3.type != KEYWORD && memO.L_kvar){ memO.var_dc_bgn=(memO.L_kvar=false); }//end of variable declaration? | |
| j++; | |
| }; //end while loop #2 | |
| //t2.data=t1; //switching to replacing t1 (t3.contents) with new ported code | |
| js_unfin_files_a_o[i]= { "name":cpp_files_a[i].name+".js", "data":t1 } ; | |
| i++; | |
| }; | |
| return js_unfin_files_a_o; | |
| }; | |
| //ug, c++ is too versitile to port, should I try porting assembly instead? |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment