Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save davidhcefx/74950e34dfb8a5cea38bf0212c144769 to your computer and use it in GitHub Desktop.
Save davidhcefx/74950e34dfb8a5cea38bf0212c144769 to your computer and use it in GitHub Desktop.
Generate mappings from a language to its corresponding script name and dependencies for highlight.js
"""
Generate mappings from a language to its corresponding script name and
dependencies for highlight.js. Tool written by davidhcefx, 2020.8.24.
In highlight.js, we highlight codes via <pre><code class="language"></code></pre>
However, what is the corresponding JS script name for a specific language?
What are the dependencies, if any, in order to use that language?
This tool aims to build the relationships by parsing the source files.
"""
from typing import List, Dict
import re
from os import listdir, path
import requests
# path to the highlight.js repo
LANG = './highlight.js/src/languages/'
# default languages in pre-built CDN (https://highlightjs.org/download/)
CDN_DEFAULTS: List[str] = []
def is_in_default(fname: str) -> bool:
if len(CDN_DEFAULTS) == 0:
common_pat = re.compile(r'id="download-form".+?<ul>(.+?)</ul>', re.DOTALL)
name_pat = re.compile(r'name="([^"]+)"')
r = requests.get('https://highlightjs.org/download/', timeout=5)
assert r.ok, "Request failed"
match = common_pat.search(r.text)
assert match is not None, 'Cannot find "Common" section in the HTML'
for li in match.group(1).split('<li>'):
m = name_pat.search(li)
if m:
CDN_DEFAULTS.append(m.group(1))
return fname in CDN_DEFAULTS
def main() -> None:
libname: Dict[str, str] = dict() # libname for non-default languages
dependency: Dict[str, List[str]] = dict() # non-default dependencies
alias_pat = re.compile(r'\baliases:\s*(\[\s*(("[^"]+"|\'[^\']+\'),?\s*)+\])', re.DOTALL)
require_pat = re.compile(r'\bRequires:\s*(.+?)\n', re.DOTALL)
for fname in listdir(LANG):
# don't generate mappings for default languages
if is_in_default(fname) or not path.isfile(f'{LANG}/{fname}'):
continue
assert fname.endswith('.js'), f'Error: {fname}'
libname[fname[:-3]] = fname # adds the original name
data = open(f'{LANG}/{fname}').read()
# search for aliases
match = alias_pat.search(data)
assert ('aliases:' in data) == (match is not None), f'Error: {fname}'
if match:
for alias in eval(match.group(1)):
libname[alias] = fname
# search for non-default dependencies
match = require_pat.search(data)
assert ('Requires:' in data) == (match is not None), f'Error: {fname}'
if match:
dep_list = list(filter(lambda name: not is_in_default(name),
map(str.strip, match.group(1).split(','))))
if dep_list:
dependency[fname] = dep_list
print(libname)
"""
{'1c': '1c.js', 'abnf': 'abnf.js', 'accesslog': 'accesslog.js', 'actionscript': 'actionscript.js', 'as': 'actionscript.js', 'ada': 'ada.js', 'angelscript': 'angelscript.js', 'asc': 'angelscript.js', 'apache': 'apache.js', 'apacheconf': 'apache.js', 'applescript': 'applescript.js', 'osascript': 'applescript.js', 'arcade': 'arcade.js', 'arduino': 'arduino.js', 'armasm': 'armasm.js', 'arm': 'armasm.js', 'asciidoc': 'asciidoc.js', 'adoc': 'asciidoc.js', 'aspectj': 'aspectj.js', 'autohotkey': 'autohotkey.js', 'ahk': 'autohotkey.js', 'autoit': 'autoit.js', 'avrasm': 'avrasm.js', 'awk': 'awk.js', 'axapta': 'axapta.js', 'x++': 'axapta.js', 'basic': 'basic.js', 'bnf': 'bnf.js', 'brainfuck': 'brainfuck.js', 'bf': 'brainfuck.js', 'cal': 'cal.js', 'capnproto': 'capnproto.js', 'capnp': 'capnproto.js', 'ceylon': 'ceylon.js', 'clean': 'clean.js', 'icl': 'clean.js', 'dcl': 'clean.js', 'clojure-repl': 'clojure-repl.js', 'clojure': 'clojure.js', 'clj': 'clojure.js', 'edn': 'clojure.js', 'cmake': 'cmake.js', 'cmake.in': 'cmake.js', 'coffeescript': 'coffeescript.js', 'coffee': 'coffeescript.js', 'cson': 'coffeescript.js', 'iced': 'coffeescript.js', 'coq': 'coq.js', 'cos': 'cos.js', 'cls': 'cos.js', 'crmsh': 'crmsh.js', 'crm': 'crmsh.js', 'pcmk': 'crmsh.js', 'crystal': 'crystal.js', 'cr': 'crystal.js', 'csp': 'csp.js', 'd': 'd.js', 'dart': 'dart.js', 'delphi': 'delphi.js', 'dpr': 'delphi.js', 'dfm': 'delphi.js', 'pas': 'delphi.js', 'pascal': 'delphi.js', 'django': 'django.js', 'jinja': 'django.js', 'dns': 'dns.js', 'bind': 'dns.js', 'zone': 'dns.js', 'dockerfile': 'dockerfile.js', 'docker': 'dockerfile.js', 'dos': 'dos.js', 'bat': 'dos.js', 'cmd': 'dos.js', 'dsconfig': 'dsconfig.js', 'dts': 'dts.js', 'dust': 'dust.js', 'dst': 'dust.js', 'ebnf': 'ebnf.js', 'elixir': 'elixir.js', 'ex': 'elixir.js', 'exs': 'elixir.js', 'elm': 'elm.js', 'erb': 'erb.js', 'erlang-repl': 'erlang-repl.js', 'erlang': 'erlang.js', 'erl': 'erlang.js', 'excel': 'excel.js', 'xlsx': 'excel.js', 'xls': 'excel.js', 'fix': 'fix.js', 'flix': 'flix.js', 'fortran': 'fortran.js', 'f90': 'fortran.js', 'f95': 'fortran.js', 'fsharp': 'fsharp.js', 'fs': 'fsharp.js', 'f#': 'fsharp.js', 'gams': 'gams.js', 'gms': 'gams.js', 'gauss': 'gauss.js', 'gss': 'gauss.js', 'gcode': 'gcode.js', 'nc': 'gcode.js', 'gherkin': 'gherkin.js', 'feature': 'gherkin.js', 'glsl': 'glsl.js', 'gml': 'gml.js', 'golo': 'golo.js', 'gradle': 'gradle.js', 'graphql': 'graphql.js', 'gql': 'graphql.js', 'groovy': 'groovy.js', 'haml': 'haml.js', 'handlebars': 'handlebars.js', 'hbs': 'handlebars.js', 'html.hbs': 'handlebars.js', 'html.handlebars': 'handlebars.js', 'htmlbars': 'handlebars.js', 'haskell': 'haskell.js', 'hs': 'haskell.js', 'haxe': 'haxe.js', 'hx': 'haxe.js', 'hsp': 'hsp.js', 'http': 'http.js', 'https': 'http.js', 'hy': 'hy.js', 'hylang': 'hy.js', 'inform7': 'inform7.js', 'i7': 'inform7.js', 'irpf90': 'irpf90.js', 'isbl': 'isbl.js', 'jboss-cli': 'jboss-cli.js', 'wildfly-cli': 'jboss-cli.js', 'julia-repl': 'julia-repl.js', 'jldoctest': 'julia-repl.js', 'julia': 'julia.js', 'lasso': 'lasso.js', 'ls': 'livescript.js', 'lassoscript': 'lasso.js', 'latex': 'latex.js', 'tex': 'latex.js', 'ldif': 'ldif.js', 'leaf': 'leaf.js', 'lisp': 'lisp.js', 'livecodeserver': 'livecodeserver.js', 'livescript': 'livescript.js', 'llvm': 'llvm.js', 'lsl': 'lsl.js', 'mathematica': 'mathematica.js', 'mma': 'mathematica.js', 'wl': 'mathematica.js', 'matlab': 'matlab.js', 'maxima': 'maxima.js', 'mel': 'mel.js', 'mercury': 'mercury.js', 'm': 'mercury.js', 'moo': 'mercury.js', 'mipsasm': 'mipsasm.js', 'mips': 'mipsasm.js', 'mizar': 'mizar.js', 'mojolicious': 'mojolicious.js', 'monkey': 'monkey.js', 'moonscript': 'moonscript.js', 'moon': 'moonscript.js', 'n1ql': 'n1ql.js', 'nestedtext': 'nestedtext.js', 'nt': 'nestedtext.js', 'nginx': 'nginx.js', 'nginxconf': 'nginx.js', 'nim': 'nim.js', 'nix': 'nix.js', 'nixos': 'nix.js', 'node-repl': 'node-repl.js', 'nsis': 'nsis.js', 'ocaml': 'ocaml.js', 'ml': 'sml.js', 'openscad': 'openscad.js', 'scad': 'openscad.js', 'oxygene': 'oxygene.js', 'parser3': 'parser3.js', 'pf': 'pf.js', 'pf.conf': 'pf.js', 'pgsql': 'pgsql.js', 'postgres': 'pgsql.js', 'postgresql': 'pgsql.js', 'pony': 'pony.js', 'powershell': 'powershell.js', 'pwsh': 'powershell.js', 'ps': 'powershell.js', 'ps1': 'powershell.js', 'processing': 'processing.js', 'pde': 'processing.js', 'profile': 'profile.js', 'prolog': 'prolog.js', 'properties': 'properties.js', 'protobuf': 'protobuf.js', 'puppet': 'puppet.js', 'pp': 'puppet.js', 'purebasic': 'purebasic.js', 'pb': 'purebasic.js', 'pbi': 'purebasic.js', 'q': 'q.js', 'k': 'q.js', 'kdb': 'q.js', 'qml': 'qml.js', 'qt': 'qml.js', 'reasonml': 'reasonml.js', 're': 'reasonml.js', 'rib': 'rib.js', 'roboconf': 'roboconf.js', 'graph': 'roboconf.js', 'instances': 'roboconf.js', 'routeros': 'routeros.js', 'mikrotik': 'routeros.js', 'rsl': 'rsl.js', 'ruleslanguage': 'ruleslanguage.js', 'sas': 'sas.js', 'scala': 'scala.js', 'scheme': 'scheme.js', 'scilab': 'scilab.js', 'sci': 'scilab.js', 'smali': 'smali.js', 'smalltalk': 'smalltalk.js', 'st': 'smalltalk.js', 'sml': 'sml.js', 'sqf': 'sqf.js', 'stan': 'stan.js', 'stanfuncs': 'stan.js', 'stata': 'stata.js', 'do': 'stata.js', 'ado': 'stata.js', 'step21': 'step21.js', 'p21': 'step21.js', 'step': 'step21.js', 'stp': 'step21.js', 'stylus': 'stylus.js', 'styl': 'stylus.js', 'subunit': 'subunit.js', 'taggerscript': 'taggerscript.js', 'tap': 'tap.js', 'tcl': 'tcl.js', 'tk': 'tcl.js', 'thrift': 'thrift.js', 'tp': 'tp.js', 'twig': 'twig.js', 'craftcms': 'twig.js', 'vala': 'vala.js', 'vbscript-html': 'vbscript-html.js', 'vbscript': 'vbscript.js', 'vbs': 'vbscript.js', 'verilog': 'verilog.js', 'v': 'verilog.js', 'sv': 'verilog.js', 'svh': 'verilog.js', 'vhdl': 'vhdl.js', 'vim': 'vim.js', 'wasm': 'wasm.js', 'wren': 'wren.js', 'x86asm': 'x86asm.js', 'xl': 'xl.js', 'tao': 'xl.js', 'xquery': 'xquery.js', 'xpath': 'xquery.js', 'xq': 'xquery.js', 'zephir': 'zephir.js', 'zep': 'zephir.js'}
"""
print(dependency)
"""
{'clojure-repl.js': ['clojure.js'], 'julia-repl.js': ['julia.js'], 'vbscript-html.js': ['vbscript.js']}
"""
if all(map(lambda dep: len(dep) == 1, dependency.values())):
short_deps: List[str, str] = dict()
for key in dependency:
short_deps[key] = dependency[key][0]
print(short_deps)
print(f'{len(CDN_DEFAULTS)} default languages are in the prebuilt CDN.')
"""
34 default languages are in the prebuilt CDN.
"""
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment