Last active
March 31, 2016 18:07
-
-
Save jordanbtucker/2b30c2f2dfafb2961f5e to your computer and use it in GitHub Desktop.
Generate the regular expressions for ECMAScript 5.1 identifiers and whitespace
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var identifierStart = /[$A-Z_\xaa\xba\xc0-\xd6\xd8-\xdf\u0100\u0102\u0104\u0106\u0108\u010a\u010c\u010e\u0110\u0112\u0114\u0116\u0118\u011a\u011c\u011e\u0120\u0122\u0124\u0126\u0128\u012a\u012c\u012e\u0130-\u0132\u0134\u0136\u0138\u0139\u013b\u013d\u013f\u0141\u0143\u0145\u0147\u0149\u014a\u014c\u014e\u0150\u0152\u0154\u0156\u0158\u015a\u015c\u015e\u0160\u0162\u0164\u0166\u0168\u016a\u016c\u016e\u0170\u0172\u0174\u0176\u0178\u0179\u017b\u017d\u017f\u0181\u0182\u0184\u0186\u0187\u0189-\u018b\u018d-\u0191\u0193\u0194\u0196-\u0198\u019b-\u019d\u019f\u01a0\u01a2\u01a4\u01a6\u01a7\u01a9-\u01ac\u01ae\u01af\u01b1-\u01b3\u01b5\u01b7\u01b8\u01ba-\u01bc\u01be\u01c0-\u01c4\u01c7\u01ca\u01cd\u01cf\u01d1\u01d3\u01d5\u01d7\u01d9\u01db\u01de\u01e0\u01e2\u01e4\u01e6\u01e8\u01ea\u01ec\u01ee\u01f0\u01f1\u01f4\u01f6-\u01f8\u01fa\u01fc\u01fe\u0200\u0202\u0204\u0206\u0208\u020a\u020c\u020e\u0210\u0212\u0214\u0216\u0218\u021a\u021c\u021e\u0220-\u0222\u0224\u0226\u0228\u022a\u022c\u022e\u0230\u0232\u0234-\u023b\u023d\u023e\u0241\u0243-\u0246\u0248\u024a\u024c\u024e\u0255\u0258\u025a\u025c-\u025f\u0261\u0262\u0264\u0267\u026a\u026c-\u026e\u0270\u0273\u0274\u0276-\u027c\u027e\u027f\u0281\u0282\u0284-\u0287\u028d-\u0291\u0293-\u02c1\u02c6-\u02d1\u02e0-\u02e4\u02ec\u02ee\u0370\u0372\u0374\u0376\u037a\u037f\u0386\u0388-\u038a\u038c\u038e-\u03a1\u03a3-\u03ab\u03b0\u03cf\u03d2-\u03d4\u03d8\u03da\u03dc\u03de\u03e0\u03e2\u03e4\u03e6\u03e8\u03ea\u03ec\u03ee\u03f3\u03f4\u03f7\u03f9\u03fa\u03fc-\u042f\u0460\u0462\u0464\u0466\u0468\u046a\u046c\u046e\u0470\u0472\u0474\u0476\u0478\u047a\u047c\u047e\u0480\u048a\u048c\u048e\u0490\u0492\u0494\u0496\u0498\u049a\u049c\u049e\u04a0\u04a2\u04a4\u04a6\u04a8\u04aa\u04ac\u04ae\u04b0\u04b2\u04b4\u04b6\u04b8\u04ba\u04bc\u04be\u04c0\u04c1\u04c3\u04c5\u04c7\u04c9\u04cb\u04cd\u04d0\u04d2\u04d4\u04d6\u04d8\u04da\u04dc\u04de\u04e0\u04e2\u04e4\u04e6\u04e8\u04ea\u04ec\u04ee\u04f0\u04f2\u04f4\u04f6\u04f8\u04fa\u04fc\u04fe\u0500\u0502\u0504\u0506\u0508\u050a\u050c\u050e\u0510\u0512\u0514\u0516\u0518\u051a\u051c\u051e\u0520\u0522\u0524\u0526\u0528-\u052f\u0531-\u0556\u0559\u0587\u05d0-\u05ea\u05f0-\u05f2\u0620-\u064a\u066e\u066f\u0671-\u06d3\u06d5\u06e5\u06e6\u06ee\u06ef\u06fa-\u06fc\u06ff\u0710\u0712-\u072f\u074d-\u07a5\u07b1\u07ca-\u07ea\u07f4\u07f5\u07fa\u0800-\u0815\u081a\u0824\u0828\u0840-\u0858\u08a0-\u08b2\u0904-\u0939\u093d\u0950\u0958-\u0961\u0971-\u0980\u0985-\u098c\u098f\u0990\u0993-\u09a8\u09aa-\u09b0\u09b2\u09b6-\u09b9\u09bd\u09ce\u09dc\u09dd\u09df-\u09e1\u09f0\u09f1\u0a05-\u0a0a\u0a0f\u0a10\u0a13-\u0a28\u0a2a-\u0a30\u0a32\u0a33\u0a35\u0a36\u0a38\u0a39\u0a59-\u0a5c\u0a5e\u0a72-\u0a74\u0a85-\u0a8d\u0a8f-\u0a91\u0a93-\u0aa8\u0aaa-\u0ab0\u0ab2\u0ab3\u0ab5-\u0ab9\u0abd\u0ad0\u0ae0\u0ae1\u0b05-\u0b0c\u0b0f\u0b10\u0b13-\u0b28\u0b2a-\u0b30\u0b32\u0b33\u0b35-\u0b39\u0b3d\u0b5c\u0b5d\u0b5f-\u0b61\u0b71\u0b83\u0b85-\u0b8a\u0b8e-\u0b90\u0b92-\u0b95\u0b99\u0b9a\u0b9c\u0b9e\u0b9f\u0ba3\u0ba4\u0ba8-\u0baa\u0bae-\u0bb9\u0bd0\u0c05-\u0c0c\u0c0e-\u0c10\u0c12-\u0c28\u0c2a-\u0c39\u0c3d\u0c58\u0c59\u0c60\u0c61\u0c85-\u0c8c\u0c8e-\u0c90\u0c92-\u0ca8\u0caa-\u0cb3\u0cb5-\u0cb9\u0cbd\u0cde\u0ce0\u0ce1\u0cf1\u0cf2\u0d05-\u0d0c\u0d0e-\u0d10\u0d12-\u0d3a\u0d3d\u0d4e\u0d60\u0d61\u0d7a-\u0d7f\u0d85-\u0d96\u0d9a-\u0db1\u0db3-\u0dbb\u0dbd\u0dc0-\u0dc6\u0e01-\u0e30\u0e32\u0e33\u0e40-\u0e46\u0e81\u0e82\u0e84\u0e87\u0e88\u0e8a\u0e8d\u0e94-\u0e97\u0e99-\u0e9f\u0ea1-\u0ea3\u0ea5\u0ea7\u0eaa\u0eab\u0ead-\u0eb0\u0eb2\u0eb3\u0ebd\u0ec0-\u0ec4\u0ec6\u0edc-\u0edf\u0f00\u0f40-\u0f47\u0f49-\u0f6c\u0f88-\u0f8c\u1000-\u102a\u103f\u1050-\u1055\u105a-\u105d\u1061\u1065\u1066\u106e-\u1070\u1075-\u1081\u108e\u10a0-\u10c5\u10c7\u10cd\u10d0-\u10fa\u10fc-\u1248\u124a-\u124d\u1250-\u1256\u1258\u125a-\u125d\u1260-\u1288\u128a-\u128d\u1290-\u12b0\u12b2-\u12b5\u12b8-\u12be\u12c0\u12c2-\u12c5\u12c8-\u12d6\u12d8-\u1310\u1312-\u1315\u1318-\u135a\u1380-\u138f\u13a0-\u13f4\u1401-\u166c\u166f-\u167f\u1681-\u169a\u16a0-\u16ea\u16ee-\u16f8\u1700-\u170c\u170e-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176c\u176e-\u1770\u1780-\u17b3\u17d7\u17dc\u1820-\u1877\u1880-\u18a8\u18aa\u18b0-\u18f5\u1900-\u191e\u1950-\u196d\u1970-\u1974\u1980-\u19ab\u19c1-\u19c7\u1a00-\u1a16\u1a20-\u1a54\u1aa7\u1b05-\u1b33\u1b45-\u1b4b\u1b83-\u1ba0\u1bae\u1baf\u1bba-\u1be5\u1c00-\u1c23\u1c4d-\u1c4f\u1c5a-\u1c7d\u1ce9-\u1cec\u1cee-\u1cf1\u1cf5\u1cf6\u1d00-\u1d78\u1d7a-\u1d7c\u1d7e-\u1dbf\u1e00\u1e02\u1e04\u1e06\u1e08\u1e0a\u1e0c\u1e0e\u1e10\u1e12\u1e14\u1e16\u1e18\u1e1a\u1e1c\u1e1e\u1e20\u1e22\u1e24\u1e26\u1e28\u1e2a\u1e2c\u1e2e\u1e30\u1e32\u1e34\u1e36\u1e38\u1e3a\u1e3c\u1e3e\u1e40\u1e42\u1e44\u1e46\u1e48\u1e4a\u1e4c\u1e4e\u1e50\u1e52\u1e54\u1e56\u1e58\u1e5a\u1e5c\u1e5e\u1e60\u1e62\u1e64\u1e66\u1e68\u1e6a\u1e6c\u1e6e\u1e70\u1e72\u1e74\u1e76\u1e78\u1e7a\u1e7c\u1e7e\u1e80\u1e82\u1e84\u1e86\u1e88\u1e8a\u1e8c\u1e8e\u1e90\u1e92\u1e94\u1e96-\u1e9a\u1e9c-\u1ea0\u1ea2\u1ea4\u1ea6\u1ea8\u1eaa\u1eac\u1eae\u1eb0\u1eb2\u1eb4\u1eb6\u1eb8\u1eba\u1ebc\u1ebe\u1ec0\u1ec2\u1ec4\u1ec6\u1ec8\u1eca\u1ecc\u1ece\u1ed0\u1ed2\u1ed4\u1ed6\u1ed8\u1eda\u1edc\u1ede\u1ee0\u1ee2\u1ee4\u1ee6\u1ee8\u1eea\u1eec\u1eee\u1ef0\u1ef2\u1ef4\u1ef6\u1ef8\u1efa\u1efc\u1efe\u1f08-\u1f0f\u1f18-\u1f1d\u1f28-\u1f2f\u1f38-\u1f3f\u1f48-\u1f4d\u1f50\u1f52\u1f54\u1f56\u1f59\u1f5b\u1f5d\u1f5f\u1f68-\u1f6f\u1f80-\u1faf\u1fb2-\u1fb4\u1fb6-\u1fbc\u1fc2-\u1fc4\u1fc6-\u1fcc\u1fd2\u1fd3\u1fd6-\u1fdb\u1fe2-\u1fe4\u1fe6-\u1fec\u1ff2-\u1ff4\u1ff6-\u1ffc\u2071\u207f\u2090-\u209c\u2102\u2107\u210a-\u2113\u2115\u2119-\u211d\u2124\u2126\u2128\u212a-\u212d\u212f-\u2139\u213c-\u213f\u2145-\u2149\u2160-\u216f\u2180-\u2183\u2185-\u2188\u2c00-\u2c2e\u2c60\u2c62-\u2c64\u2c67\u2c69\u2c6b\u2c6d-\u2c72\u2c74\u2c75\u2c77-\u2c80\u2c82\u2c84\u2c86\u2c88\u2c8a\u2c8c\u2c8e\u2c90\u2c92\u2c94\u2c96\u2c98\u2c9a\u2c9c\u2c9e\u2ca0\u2ca2\u2ca4\u2ca6\u2ca8\u2caa\u2cac\u2cae\u2cb0\u2cb2\u2cb4\u2cb6\u2cb8\u2cba\u2cbc\u2cbe\u2cc0\u2cc2\u2cc4\u2cc6\u2cc8\u2cca\u2ccc\u2cce\u2cd0\u2cd2\u2cd4\u2cd6\u2cd8\u2cda\u2cdc\u2cde\u2ce0\u2ce2\u2ce4\u2ceb\u2ced\u2cf2\u2d30-\u2d67\u2d6f\u2d80-\u2d96\u2da0-\u2da6\u2da8-\u2dae\u2db0-\u2db6\u2db8-\u2dbe\u2dc0-\u2dc6\u2dc8-\u2dce\u2dd0-\u2dd6\u2dd8-\u2dde\u2e2f\u3005-\u3007\u3021-\u3029\u3031-\u3035\u3038-\u303c\u3041-\u3096\u309d-\u309f\u30a1-\u30fa\u30fc-\u30ff\u3105-\u312d\u3131-\u318e\u31a0-\u31ba\u31f0-\u31ff\u3400\u4db5\u4e00\u9fcc\ua000-\ua48c\ua4d0-\ua4fd\ua500-\ua60c\ua610-\ua61f\ua62a\ua62b\ua640\ua642\ua644\ua646\ua648\ua64a\ua64c\ua64e\ua650\ua652\ua654\ua656\ua658\ua65a\ua65c\ua65e\ua660\ua662\ua664\ua666\ua668\ua66a\ua66c\ua66e\ua67f\ua680\ua682\ua684\ua686\ua688\ua68a\ua68c\ua68e\ua690\ua692\ua694\ua696\ua698-\ua69d\ua6a0-\ua6ef\ua717-\ua71f\ua722\ua724\ua726\ua728\ua72a\ua72c\ua72e\ua730-\ua732\ua734\ua736\ua738\ua73a\ua73c\ua73e\ua740\ua742\ua744\ua746\ua748\ua74a\ua74c\ua74e\ua750\ua752\ua754\ua756\ua758\ua75a\ua75c\ua75e\ua760\ua762\ua764\ua766\ua768\ua76a\ua76c\ua76e\ua770-\ua779\ua77b\ua77d\ua77e\ua780\ua782\ua784\ua786\ua788\ua78b\ua78d\ua78e\ua790\ua792\ua794-\ua7a0\ua7a2\ua7a4\ua7a6\ua7a8\ua7aa-\ua7ad\ua7b0\ua7b1\ua7f7-\ua801\ua803-\ua805\ua807-\ua80a\ua80c-\ua822\ua840-\ua873\ua882-\ua8b3\ua8f2-\ua8f7\ua8fb\ua90a-\ua925\ua930-\ua946\ua960-\ua97c\ua984-\ua9b2\ua9cf\ua9e0-\ua9e4\ua9e6-\ua9ef\ua9fa-\ua9fe\uaa00-\uaa28\uaa40-\uaa42\uaa44-\uaa4b\uaa60-\uaa76\uaa7a\uaa7e-\uaaaf\uaab1\uaab5\uaab6\uaab9-\uaabd\uaac0\uaac2\uaadb-\uaadd\uaae0-\uaaea\uaaf2-\uaaf4\uab01-\uab06\uab09-\uab0e\uab11-\uab16\uab20-\uab26\uab28-\uab2e\uab30-\uab5a\uab5c-\uab5f\uab64\uab65\uabc0-\uabe2\uac00\ud7a3\ud7b0-\ud7c6\ud7cb-\ud7fb\uf900-\ufa6d\ufa70-\ufad9\ufb00-\ufb06\ufb13-\ufb17\ufb1d\ufb1f-\ufb28\ufb2a-\ufb36\ufb38-\ufb3c\ufb3e\ufb40\ufb41\ufb43\ufb44\ufb46-\ufbb1\ufbd3-\ufd3d\ufd50-\ufd8f\ufd92-\ufdc7\ufdf0-\ufdfb\ufe70-\ufe74\ufe76-\ufefc\uff21-\uff3a\uff66-\uffbe\uffc2-\uffc7\uffca-\uffcf\uffd2-\uffd7\uffda-\uffdc]/i; | |
var identifierPart = /[\w\u0300-\u036f\u0483-\u0487\u0591-\u05bd\u05bf\u05c1\u05c2\u05c4\u05c5\u05c7\u0610-\u061a\u064b-\u0669\u0670\u06d6-\u06dc\u06df-\u06e4\u06e7\u06e8\u06ea-\u06ed\u06f0-\u06f9\u0711\u0730-\u074a\u07a6-\u07b0\u07c0-\u07c9\u07eb-\u07f3\u0816-\u0819\u081b-\u0823\u0825-\u0827\u0829-\u082d\u0859-\u085b\u08e4-\u0903\u093a-\u093c\u093e-\u094f\u0951-\u0957\u0962\u0963\u0966-\u096f\u0981-\u0983\u09bc\u09be-\u09c4\u09c7\u09c8\u09cb-\u09cd\u09d7\u09e2\u09e3\u09e6-\u09ef\u0a01-\u0a03\u0a3c\u0a3e-\u0a42\u0a47\u0a48\u0a4b-\u0a4d\u0a51\u0a66-\u0a71\u0a75\u0a81-\u0a83\u0abc\u0abe-\u0ac5\u0ac7-\u0ac9\u0acb-\u0acd\u0ae2\u0ae3\u0ae6-\u0aef\u0b01-\u0b03\u0b3c\u0b3e-\u0b44\u0b47\u0b48\u0b4b-\u0b4d\u0b56\u0b57\u0b62\u0b63\u0b66-\u0b6f\u0b82\u0bbe-\u0bc2\u0bc6-\u0bc8\u0bca-\u0bcd\u0bd7\u0be6-\u0bef\u0c00-\u0c03\u0c3e-\u0c44\u0c46-\u0c48\u0c4a-\u0c4d\u0c55\u0c56\u0c62\u0c63\u0c66-\u0c6f\u0c81-\u0c83\u0cbc\u0cbe-\u0cc4\u0cc6-\u0cc8\u0cca-\u0ccd\u0cd5\u0cd6\u0ce2\u0ce3\u0ce6-\u0cef\u0d01-\u0d03\u0d3e-\u0d44\u0d46-\u0d48\u0d4a-\u0d4d\u0d57\u0d62\u0d63\u0d66-\u0d6f\u0d82\u0d83\u0dca\u0dcf-\u0dd4\u0dd6\u0dd8-\u0ddf\u0de6-\u0def\u0df2\u0df3\u0e31\u0e34-\u0e3a\u0e47-\u0e4e\u0e50-\u0e59\u0eb1\u0eb4-\u0eb9\u0ebb\u0ebc\u0ec8-\u0ecd\u0ed0-\u0ed9\u0f18\u0f19\u0f20-\u0f29\u0f35\u0f37\u0f39\u0f3e\u0f3f\u0f71-\u0f84\u0f86\u0f87\u0f8d-\u0f97\u0f99-\u0fbc\u0fc6\u102b-\u103e\u1040-\u1049\u1056-\u1059\u105e-\u1060\u1062-\u1064\u1067-\u106d\u1071-\u1074\u1082-\u108d\u108f-\u109d\u135d-\u135f\u1712-\u1714\u1732-\u1734\u1752\u1753\u1772\u1773\u17b4-\u17d3\u17dd\u17e0-\u17e9\u180b-\u180d\u1810-\u1819\u18a9\u1920-\u192b\u1930-\u193b\u1946-\u194f\u19b0-\u19c0\u19c8\u19c9\u19d0-\u19d9\u1a17-\u1a1b\u1a55-\u1a5e\u1a60-\u1a7c\u1a7f-\u1a89\u1a90-\u1a99\u1ab0-\u1abd\u1b00-\u1b04\u1b34-\u1b44\u1b50-\u1b59\u1b6b-\u1b73\u1b80-\u1b82\u1ba1-\u1bad\u1bb0-\u1bb9\u1be6-\u1bf3\u1c24-\u1c37\u1c40-\u1c49\u1c50-\u1c59\u1cd0-\u1cd2\u1cd4-\u1ce8\u1ced\u1cf2-\u1cf4\u1cf8\u1cf9\u1dc0-\u1df5\u1dfc-\u1dff\u200c\u200d\u203f\u2040\u2054\u20d0-\u20dc\u20e1\u20e5-\u20f0\u2cef-\u2cf1\u2d7f\u2de0-\u2dff\u302a-\u302f\u3099\u309a\ua620-\ua629\ua66f\ua674-\ua67d\ua69f\ua6f0\ua6f1\ua802\ua806\ua80b\ua823-\ua827\ua880\ua881\ua8b4-\ua8c4\ua8d0-\ua8d9\ua8e0-\ua8f1\ua900-\ua909\ua926-\ua92d\ua947-\ua953\ua980-\ua983\ua9b3-\ua9c0\ua9d0-\ua9d9\ua9e5\ua9f0-\ua9f9\uaa29-\uaa36\uaa43\uaa4c\uaa4d\uaa50-\uaa59\uaa7b-\uaa7d\uaab0\uaab2-\uaab4\uaab7\uaab8\uaabe\uaabf\uaac1\uaaeb-\uaaef\uaaf5\uaaf6\uabe3-\uabea\uabec\uabed\uabf0-\uabf9\ufb1e\ufe00-\ufe0f\ufe20-\ufe2d\ufe33\ufe34\ufe4d-\ufe4f\uff10-\uff19\uff3f]/i; | |
var whitespace = /\s/; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var getCharCodes = function(chars) { | |
var charCodes = []; | |
for(var i = 0; i < chars.length; i++) { | |
var charCode = chars[i].charCodeAt(0); | |
if(charCodes.indexOf(charCode) < 0) { | |
charCodes.push(charCode); | |
} | |
} | |
return charCodes; | |
}; | |
var _unicodeCategories = null; | |
var getUnicodeCatagories = function(callback) { | |
if(_unicodeCategories) { | |
callback(null, _unicodeCategories); | |
} | |
else { | |
var categories = {}; | |
require('http').get('http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt', function(res) { | |
res.setEncoding('utf8'); | |
var data = ''; | |
res.on('data', function(d) { data += d; }); | |
res.on('end', function() { | |
var lines = data.split('\n'); | |
for(var i = 0; i < lines.length; i++) { | |
if(!lines[i]) | |
continue; | |
var columns = lines[i].split(';', 4); | |
var code = parseInt(columns[0], 16); | |
var category = columns[2]; | |
if(!categories[category]) { | |
categories[category] = []; | |
} | |
categories[category].push(code); | |
} | |
_unicodeCategories = categories; | |
callback(null, categories); | |
}); | |
}); | |
} | |
}; | |
var getUnicodeCodePoints = function(categories, callback) { | |
getUnicodeCatagories(function(err, unicode) { | |
var codePoints = []; | |
for(var i = 0; i < categories.length; i++) { | |
var category = unicode[categories[i]]; | |
for(var j = 0; j < category.length; j++) { | |
if(category[j] <= 0xffff) { | |
codePoints.push(category[j]); | |
} | |
} | |
} | |
callback(null, codePoints); | |
}); | |
}; | |
var removeCharCodes = function(regex, charCodes) { | |
var newCharCodes = []; | |
for(var i = 0; i < charCodes.length; i++) { | |
if(!regex.test(String.fromCharCode(charCodes[i]))) { | |
newCharCodes.push(charCodes[i]); | |
} | |
} | |
return newCharCodes; | |
}; | |
var getUnicodeEscape = function(charCode) { | |
var hex = charCode.toString(16); | |
hex = '\\u000'.substr(0, 6 - hex.length) + hex; | |
return hex; | |
}; | |
var removeSameCaseCharCodes = function(charCodes) { | |
var newCharCodes = []; | |
var ignores = []; | |
for(var i = 0; i < charCodes.length; i++) { | |
if(i % 0xff === 0) { | |
console.log(i / charCodes.length * 100); | |
} | |
if(ignores.indexOf(charCodes[i]) < 0) { | |
newCharCodes.push(charCodes[i]); | |
var pattern = '[' + getUnicodeEscape(charCodes[i]) + ']'; | |
var regex = new RegExp(pattern, 'i'); | |
for(var j = i + 1; j < charCodes.length; j++) { | |
var char = String.fromCharCode(charCodes[j]); | |
if(regex.test(char)) { | |
ignores.push(charCodes[j]); | |
} | |
} | |
} | |
} | |
return newCharCodes; | |
}; | |
var comparativeSort = function(array) { | |
var newArray = array.slice(0); | |
newArray.sort(function(a, b) { | |
return a > b ? 1 : a < b ? -1 : 0; | |
}); | |
return newArray; | |
}; | |
var rangeify = function(sortedSet) { | |
var newSet = []; | |
for(var i = 0; i < sortedSet.length; i++) { | |
if(i >= sortedSet.length - 2) { | |
newSet.push(sortedSet[i]); | |
continue; | |
} | |
if(sortedSet[i + 2] === sortedSet[i] + 2) { | |
for(var j = i + 2; j < sortedSet.length - 1; j++) { | |
if(sortedSet[j + 1] !== sortedSet[j] + 1) { | |
break; | |
} | |
} | |
newSet.push([sortedSet[i], sortedSet[j]]); | |
i = j; | |
continue; | |
} | |
newSet.push(sortedSet[i]); | |
} | |
return newSet; | |
}; | |
var regexEscapes1 = [ | |
'\\', | |
'^', | |
'-', | |
']', | |
]; | |
var regexEscapes2 = { | |
'\t': 't', | |
'\r': 'r', | |
'\n': 'n', | |
'\v': 'v', | |
'\f': 'f', | |
'\b': 'b', | |
'\0': '0', | |
}; | |
var getRegexEscape = function(charCode) { | |
var char = String.fromCharCode(charCode); | |
if(regexEscapes1.indexOf(char) >= 0) { | |
return '\\' + char; | |
} | |
if(regexEscapes2[char] != null) { | |
return '\\' + regexEscapes2[char]; | |
} | |
if(charCode >= 0x01 && charCode <= 0x1A) { | |
return '\\c' + String.fromCharCode(charCode + 0x40); | |
} | |
if(charCode >= 0x20 && charCode <= 0x7E) { | |
return char; | |
} | |
if(charCode <= 0xFF) { | |
return '\\x' + charCode.toString(16); | |
} | |
if(charCode <= 0xFFF) { | |
return '\\u0' + charCode.toString(16); | |
} | |
if(charCode <= 0xFFFF) { | |
return '\\u' + charCode.toString(16); | |
} | |
}; | |
var getRegex = function(source, chars, categories, callback) { | |
var charCodes = []; | |
console.log('Getting char codes...'); | |
if(chars) { | |
charCodes = getCharCodes(chars); | |
} | |
console.log('Getting unicode code points...'); | |
if(categories) { | |
getUnicodeCodePoints(categories, function(err, codePoints) { | |
charCodes = charCodes.concat(codePoints); | |
partDeux(charCodes, callback); | |
}); | |
} | |
else { | |
partDeux(charCodes, callback); | |
} | |
var partDeux = function(charCodes, callback) { | |
console.log('Removing char codes matching source...'); | |
if(source) { | |
charCodes = removeCharCodes(source, charCodes); | |
} | |
console.log('Sorting char codes...'); | |
charCodes = comparativeSort(charCodes); | |
console.log('Rangeifying char codes...'); | |
charCodes = rangeify(charCodes); | |
console.log('Building regex...'); | |
var pattern = ''; | |
if(source) { | |
pattern = source.source; | |
} | |
for(var i = 0; i < charCodes.length; i++) { | |
var charCode = charCodes[i]; | |
if(charCode instanceof Array) { | |
pattern += getRegexEscape(charCode[0]); | |
pattern += '-'; | |
pattern += getRegexEscape(charCode[1]); | |
continue; | |
} | |
pattern += getRegexEscape(charCodes[i]); | |
} | |
callback(null, '[' + pattern + ']'); | |
}; | |
}; | |
var configs = { | |
identifierStart: { | |
source: null, | |
chars: ['$', '_'], | |
categories: ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl'], | |
}, | |
identifierPart: { | |
source: /\w/, | |
chars: ['\u200C', '\u200D'], | |
categories: ['Mn', 'Mc', 'Nd', 'Pc'], | |
}, | |
whitespace: { | |
source: /\s/, | |
chars: null, | |
categories: ['Zs'], | |
} | |
}; | |
var regexes = {}; | |
var keyCount = Object.keys(configs).length; | |
var count = 0; | |
var code = ''; | |
for(var key in configs) { | |
console.log('Building regex for ' + key + '...'); | |
(function(key, config) { | |
getRegex(config.source, config.chars, config.categories, function(err, pattern) { | |
regexes[key] = pattern; | |
if(++count === keyCount) { | |
for(var key in regexes) { | |
var line = 'var ' + key + ' = /' + regexes[key] + '/;' | |
code += line + '\n'; | |
console.log(); | |
console.log(line); | |
} | |
require('fs').writeFileSync('es5-regexes.js', code); | |
} | |
}); | |
})(key, configs[key]); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment