Last active
April 19, 2022 21:38
-
-
Save davidhcefx/74950e34dfb8a5cea38bf0212c144769 to your computer and use it in GitHub Desktop.
Generate mappings from a language to its corresponding script name and dependencies for highlight.js
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Generate mappings from a language to its corresponding script name and | |
dependencies for highlight.js. Tool written by davidhcefx, 2020.8.24. | |
In highlight.js, we highlight codes via <pre><code class="language"></code></pre> | |
However, what is the corresponding JS script name for a specific language? | |
What are the dependencies, if any, in order to use that language? | |
This tool aims to build the relationships by parsing the source files. | |
""" | |
from typing import List, Dict | |
import re | |
from os import listdir, path | |
import requests | |
# path to the highlight.js repo | |
LANG = './highlight.js/src/languages/' | |
# default languages in pre-built CDN (https://highlightjs.org/download/) | |
CDN_DEFAULTS: List[str] = [] | |
def is_in_default(fname: str) -> bool: | |
if len(CDN_DEFAULTS) == 0: | |
common_pat = re.compile(r'id="download-form".+?<ul>(.+?)</ul>', re.DOTALL) | |
name_pat = re.compile(r'name="([^"]+)"') | |
r = requests.get('https://highlightjs.org/download/', timeout=5) | |
assert r.ok, "Request failed" | |
match = common_pat.search(r.text) | |
assert match is not None, 'Cannot find "Common" section in the HTML' | |
for li in match.group(1).split('<li>'): | |
m = name_pat.search(li) | |
if m: | |
CDN_DEFAULTS.append(m.group(1)) | |
return fname in CDN_DEFAULTS | |
def main() -> None: | |
libname: Dict[str, str] = dict() # libname for non-default languages | |
dependency: Dict[str, List[str]] = dict() # non-default dependencies | |
alias_pat = re.compile(r'\baliases:\s*(\[\s*(("[^"]+"|\'[^\']+\'),?\s*)+\])', re.DOTALL) | |
require_pat = re.compile(r'\bRequires:\s*(.+?)\n', re.DOTALL) | |
for fname in listdir(LANG): | |
# don't generate mappings for default languages | |
if is_in_default(fname) or not path.isfile(f'{LANG}/{fname}'): | |
continue | |
assert fname.endswith('.js'), f'Error: {fname}' | |
libname[fname[:-3]] = fname # adds the original name | |
data = open(f'{LANG}/{fname}').read() | |
# search for aliases | |
match = alias_pat.search(data) | |
assert ('aliases:' in data) == (match is not None), f'Error: {fname}' | |
if match: | |
for alias in eval(match.group(1)): | |
libname[alias] = fname | |
# search for non-default dependencies | |
match = require_pat.search(data) | |
assert ('Requires:' in data) == (match is not None), f'Error: {fname}' | |
if match: | |
dep_list = list(filter(lambda name: not is_in_default(name), | |
map(str.strip, match.group(1).split(',')))) | |
if dep_list: | |
dependency[fname] = dep_list | |
print(libname) | |
""" | |
{'1c': '1c.js', 'abnf': 'abnf.js', 'accesslog': 'accesslog.js', 'actionscript': 'actionscript.js', 'as': 'actionscript.js', 'ada': 'ada.js', 'angelscript': 'angelscript.js', 'asc': 'angelscript.js', 'apache': 'apache.js', 'apacheconf': 'apache.js', 'applescript': 'applescript.js', 'osascript': 'applescript.js', 'arcade': 'arcade.js', 'arduino': 'arduino.js', 'armasm': 'armasm.js', 'arm': 'armasm.js', 'asciidoc': 'asciidoc.js', 'adoc': 'asciidoc.js', 'aspectj': 'aspectj.js', 'autohotkey': 'autohotkey.js', 'ahk': 'autohotkey.js', 'autoit': 'autoit.js', 'avrasm': 'avrasm.js', 'awk': 'awk.js', 'axapta': 'axapta.js', 'x++': 'axapta.js', 'basic': 'basic.js', 'bnf': 'bnf.js', 'brainfuck': 'brainfuck.js', 'bf': 'brainfuck.js', 'cal': 'cal.js', 'capnproto': 'capnproto.js', 'capnp': 'capnproto.js', 'ceylon': 'ceylon.js', 'clean': 'clean.js', 'icl': 'clean.js', 'dcl': 'clean.js', 'clojure-repl': 'clojure-repl.js', 'clojure': 'clojure.js', 'clj': 'clojure.js', 'edn': 'clojure.js', 'cmake': 'cmake.js', 'cmake.in': 'cmake.js', 'coffeescript': 'coffeescript.js', 'coffee': 'coffeescript.js', 'cson': 'coffeescript.js', 'iced': 'coffeescript.js', 'coq': 'coq.js', 'cos': 'cos.js', 'cls': 'cos.js', 'crmsh': 'crmsh.js', 'crm': 'crmsh.js', 'pcmk': 'crmsh.js', 'crystal': 'crystal.js', 'cr': 'crystal.js', 'csp': 'csp.js', 'd': 'd.js', 'dart': 'dart.js', 'delphi': 'delphi.js', 'dpr': 'delphi.js', 'dfm': 'delphi.js', 'pas': 'delphi.js', 'pascal': 'delphi.js', 'django': 'django.js', 'jinja': 'django.js', 'dns': 'dns.js', 'bind': 'dns.js', 'zone': 'dns.js', 'dockerfile': 'dockerfile.js', 'docker': 'dockerfile.js', 'dos': 'dos.js', 'bat': 'dos.js', 'cmd': 'dos.js', 'dsconfig': 'dsconfig.js', 'dts': 'dts.js', 'dust': 'dust.js', 'dst': 'dust.js', 'ebnf': 'ebnf.js', 'elixir': 'elixir.js', 'ex': 'elixir.js', 'exs': 'elixir.js', 'elm': 'elm.js', 'erb': 'erb.js', 'erlang-repl': 'erlang-repl.js', 'erlang': 'erlang.js', 'erl': 'erlang.js', 'excel': 'excel.js', 'xlsx': 'excel.js', 'xls': 'excel.js', 'fix': 'fix.js', 'flix': 'flix.js', 'fortran': 'fortran.js', 'f90': 'fortran.js', 'f95': 'fortran.js', 'fsharp': 'fsharp.js', 'fs': 'fsharp.js', 'f#': 'fsharp.js', 'gams': 'gams.js', 'gms': 'gams.js', 'gauss': 'gauss.js', 'gss': 'gauss.js', 'gcode': 'gcode.js', 'nc': 'gcode.js', 'gherkin': 'gherkin.js', 'feature': 'gherkin.js', 'glsl': 'glsl.js', 'gml': 'gml.js', 'golo': 'golo.js', 'gradle': 'gradle.js', 'graphql': 'graphql.js', 'gql': 'graphql.js', 'groovy': 'groovy.js', 'haml': 'haml.js', 'handlebars': 'handlebars.js', 'hbs': 'handlebars.js', 'html.hbs': 'handlebars.js', 'html.handlebars': 'handlebars.js', 'htmlbars': 'handlebars.js', 'haskell': 'haskell.js', 'hs': 'haskell.js', 'haxe': 'haxe.js', 'hx': 'haxe.js', 'hsp': 'hsp.js', 'http': 'http.js', 'https': 'http.js', 'hy': 'hy.js', 'hylang': 'hy.js', 'inform7': 'inform7.js', 'i7': 'inform7.js', 'irpf90': 'irpf90.js', 'isbl': 'isbl.js', 'jboss-cli': 'jboss-cli.js', 'wildfly-cli': 'jboss-cli.js', 'julia-repl': 'julia-repl.js', 'jldoctest': 'julia-repl.js', 'julia': 'julia.js', 'lasso': 'lasso.js', 'ls': 'livescript.js', 'lassoscript': 'lasso.js', 'latex': 'latex.js', 'tex': 'latex.js', 'ldif': 'ldif.js', 'leaf': 'leaf.js', 'lisp': 'lisp.js', 'livecodeserver': 'livecodeserver.js', 'livescript': 'livescript.js', 'llvm': 'llvm.js', 'lsl': 'lsl.js', 'mathematica': 'mathematica.js', 'mma': 'mathematica.js', 'wl': 'mathematica.js', 'matlab': 'matlab.js', 'maxima': 'maxima.js', 'mel': 'mel.js', 'mercury': 'mercury.js', 'm': 'mercury.js', 'moo': 'mercury.js', 'mipsasm': 'mipsasm.js', 'mips': 'mipsasm.js', 'mizar': 'mizar.js', 'mojolicious': 'mojolicious.js', 'monkey': 'monkey.js', 'moonscript': 'moonscript.js', 'moon': 'moonscript.js', 'n1ql': 'n1ql.js', 'nestedtext': 'nestedtext.js', 'nt': 'nestedtext.js', 'nginx': 'nginx.js', 'nginxconf': 'nginx.js', 'nim': 'nim.js', 'nix': 'nix.js', 'nixos': 'nix.js', 'node-repl': 'node-repl.js', 'nsis': 'nsis.js', 'ocaml': 'ocaml.js', 'ml': 'sml.js', 'openscad': 'openscad.js', 'scad': 'openscad.js', 'oxygene': 'oxygene.js', 'parser3': 'parser3.js', 'pf': 'pf.js', 'pf.conf': 'pf.js', 'pgsql': 'pgsql.js', 'postgres': 'pgsql.js', 'postgresql': 'pgsql.js', 'pony': 'pony.js', 'powershell': 'powershell.js', 'pwsh': 'powershell.js', 'ps': 'powershell.js', 'ps1': 'powershell.js', 'processing': 'processing.js', 'pde': 'processing.js', 'profile': 'profile.js', 'prolog': 'prolog.js', 'properties': 'properties.js', 'protobuf': 'protobuf.js', 'puppet': 'puppet.js', 'pp': 'puppet.js', 'purebasic': 'purebasic.js', 'pb': 'purebasic.js', 'pbi': 'purebasic.js', 'q': 'q.js', 'k': 'q.js', 'kdb': 'q.js', 'qml': 'qml.js', 'qt': 'qml.js', 'reasonml': 'reasonml.js', 're': 'reasonml.js', 'rib': 'rib.js', 'roboconf': 'roboconf.js', 'graph': 'roboconf.js', 'instances': 'roboconf.js', 'routeros': 'routeros.js', 'mikrotik': 'routeros.js', 'rsl': 'rsl.js', 'ruleslanguage': 'ruleslanguage.js', 'sas': 'sas.js', 'scala': 'scala.js', 'scheme': 'scheme.js', 'scilab': 'scilab.js', 'sci': 'scilab.js', 'smali': 'smali.js', 'smalltalk': 'smalltalk.js', 'st': 'smalltalk.js', 'sml': 'sml.js', 'sqf': 'sqf.js', 'stan': 'stan.js', 'stanfuncs': 'stan.js', 'stata': 'stata.js', 'do': 'stata.js', 'ado': 'stata.js', 'step21': 'step21.js', 'p21': 'step21.js', 'step': 'step21.js', 'stp': 'step21.js', 'stylus': 'stylus.js', 'styl': 'stylus.js', 'subunit': 'subunit.js', 'taggerscript': 'taggerscript.js', 'tap': 'tap.js', 'tcl': 'tcl.js', 'tk': 'tcl.js', 'thrift': 'thrift.js', 'tp': 'tp.js', 'twig': 'twig.js', 'craftcms': 'twig.js', 'vala': 'vala.js', 'vbscript-html': 'vbscript-html.js', 'vbscript': 'vbscript.js', 'vbs': 'vbscript.js', 'verilog': 'verilog.js', 'v': 'verilog.js', 'sv': 'verilog.js', 'svh': 'verilog.js', 'vhdl': 'vhdl.js', 'vim': 'vim.js', 'wasm': 'wasm.js', 'wren': 'wren.js', 'x86asm': 'x86asm.js', 'xl': 'xl.js', 'tao': 'xl.js', 'xquery': 'xquery.js', 'xpath': 'xquery.js', 'xq': 'xquery.js', 'zephir': 'zephir.js', 'zep': 'zephir.js'} | |
""" | |
print(dependency) | |
""" | |
{'clojure-repl.js': ['clojure.js'], 'julia-repl.js': ['julia.js'], 'vbscript-html.js': ['vbscript.js']} | |
""" | |
if all(map(lambda dep: len(dep) == 1, dependency.values())): | |
short_deps: List[str, str] = dict() | |
for key in dependency: | |
short_deps[key] = dependency[key][0] | |
print(short_deps) | |
print(f'{len(CDN_DEFAULTS)} default languages are in the prebuilt CDN.') | |
""" | |
34 default languages are in the prebuilt CDN. | |
""" | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment