Last active
May 30, 2017 08:14
-
-
Save adriengibrat/817140a89cfd4893b4155a2ac913904d to your computer and use it in GitHub Desktop.
simple CLDR plural rules parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
/** | |
* plural.js – simple CLDR plural rules parser | |
* https://gist.github.com/adriengibrat/817140a89cfd4893b4155a2ac913904d | |
* | |
* This program is free software. It comes without any warranty. | |
* Released under the WTFPL license – http://www.wtfpl.net | |
* | |
* Usage: | |
# default amd & global names are 'plurals' / 'ordinals', depending of data provided | |
# exports all languages by default | |
./plural.js [amd & global name] [languages subset] < input.json > output.js | |
# when no input provided, outputs parser source | |
# default amd & global name is 'cldr' | |
./plural.js [amd & global name] > parser.js | |
* Examples: | |
* 0. get CLRD data | |
curl https://raw.githubusercontent.com/unicode-cldr/cldr-core/master/supplemental/plurals.json > /tmp/plurals.json | |
curl https://raw.githubusercontent.com/unicode-cldr/cldr-core/master/supplemental/ordinals.json > /tmp/ordinals.json | |
* 1. parse data to js | |
./plural.js < /tmp/plurals.json > plurals.js | |
./plural.js < /tmp/ordinals.json > ordinals.js | |
* 1.1 with custom amd & global name | |
./plural.js myplurals < /tmp/plurals.json > myplurals.js | |
./plural.js myordinals < /tmp/ordinals.json > myordinals.js | |
* 1.2 select exported language(s) | |
./plural.js pluralsUsa en es < /tmp/plurals.json > plurals-usa.js | |
./plural.js ordinalsUsa en es < /tmp/ordinals.json > ordinals-usa.js | |
* 2. get rule parser source | |
./plural.js > cldr.js | |
* 2.1 with custom amd & global name | |
./plural.js mycldr > mycldr.js | |
* 3. optionally, install uglifyjs | |
npm i -g uglify-js | |
* 3.1 pipe the output | |
./plural.js < /tmp/plurals.json | uglifyjs --compress --mangle - > plurals-all.js | |
* 3.2 make daddy proud, write crazy one liners | |
curl https://raw.githubusercontent.com/unicode-cldr/cldr-core/master/supplemental/plurals.json | ./plural.js | uglifyjs --compress --mangle - > plurals.js | |
*/ | |
var stdin = process.stdin | |
var stdout = process.stdout | |
var stderr = process.stderr | |
var args = process.argv.slice(2) | |
var input = '' | |
stdin.setEncoding('utf8') | |
stdin.on('data', function (chunk) { input += chunk }) | |
stdin.on('end', function () { | |
try { // parse input | |
var data = JSON.parse(input) | |
} | |
catch (error) { return console.error('invalid JSON data\n') } | |
try { // find rules | |
var cardinal = data.supplemental['plurals-type-cardinal'] | |
var ordinal = data.supplemental['plurals-type-ordinal'] | |
var rules = cardinal || ordinal | |
if (!rules) | |
return console.error('no cardinal or ordinal rules found\n') | |
} | |
catch (error) { return console.error('input must be CLDR data\n') } | |
try { // compile and write out | |
var name = args.unshift() || cardinal && 'plurals' || ordinal && 'ordinals' | |
var subset = args.length ? pick.bind(null, args) : identity | |
var compile = langs(cldr()) | |
var plurals = compile(subset(rules)) | |
console.log(clean(source(name, plurals.factory()))) | |
} | |
catch (error) { return console.error(error.message) } | |
}) | |
if (stdin.isTTY) { // outputs cldr source when no stdin | |
var name = args.shift() || 'cldr' | |
console.log(source(name, name === 'gettext' ? gettext : cldr)) | |
process.exit() | |
} | |
// Factories | |
function cldr () { // single language plural ruleset parser | |
function Vars (operands, sort) { // helps tracking variables used in source code | |
this.declarations = {} | |
this.lookups = (operands || []).map(lookup).reverse() // reverse for dependencies | |
this.sort = sort | |
} | |
Vars.prototype = { | |
toString: function toString () { // generate var declarations block | |
var declarations = map(identity, this.declarations, this.sort) | |
return declarations.length ? 'var ' + declarations.join('\n\t, ') + '\n' : '' | |
} | |
, parse: function parse (source) { // parse source to find given operands (variable names) | |
this.lookups.forEach(function (lookup) { lookup.call(this, source) }, this) | |
return this | |
} | |
} | |
return function cldr (ruleset) { | |
var vars = new Vars([ | |
'b = (n + ".").split(".")' // array, integer digits & fractional digits in n | |
, 'f = b[1]' // string, fractional digits in n | |
, 'i = b[0]' // string, integer digits of n | |
, 'j = Number(i) == n' // boolean, n is an integer | |
, 't = f.replace(/0+$/, "")' // string, fractional digits in n without trailing zeros | |
, 'v = f.length' // integer, number of fraction digits in n | |
], function (a, b) { return a.length - b.length || (a < b ? -1 : 1) }) // sort variable names | |
var rules = map(rule.bind(null, vars), ruleset) | |
.sort(by('type', {zero: 0, one: 1, two: 2, few: 3, many: 4, other: 5})) | |
return compile(rules, vars) | |
} | |
// Vars helper | |
function lookup (operand) { // create operand lookup function used to parse source | |
var name = /^\w+/.exec(operand).pop() | |
var pattern = new RegExp('\\b' + name + '\\b') | |
return function (source) { | |
if (pattern.test(this + source)) | |
this.declarations[name] = operand | |
} | |
} | |
// Utils | |
function map (mapper, object, order) { // map object, optionally by given order | |
return Object.keys(object) | |
.sort(order || function () { return 0 }) | |
.map(function (key) { return mapper(object[key], key) }) | |
} | |
function identity (a) { return a } // KISS helper | |
function prop (key) { return function (object) { return object[key] } } // DRY helper | |
function by (prop, order) { // create object comparator to sort by prop, according given order hash | |
return function (a, b) { return order[a[prop]] < order[b[prop]] ? -1 : 1 } | |
} | |
// Plural compile helpers | |
function rule (vars, rule, name) { // build rule definition object | |
var parts = rule.trim().split(/\s*@\w*/) | |
var condition = parse(vars, parts.shift()) | |
var type = name.replace('pluralRule-count-', '') | |
return { | |
source: (condition ? 'if (' + condition + ')\n\t' : '') + 'return "' + type + '"' | |
, type: type | |
, test: parts.join(' ').split(/[ ,~…]+/).filter(Boolean) | |
} | |
} | |
function parse (vars, source) { // convert plural rule to js code | |
var AND = ' && ' | |
var OR = ' || ' | |
var EQ = ' == ' | |
var INT = 'j && ' | |
return source // shamelessly borrowed from https://github.com/eemeli/make-plural.js | |
.replace(/([fin]) % (\d+)/g, function (_, x, n) { // modulos | |
var name = x + n | |
vars.declarations[name] = name + ' = ' + (x == 'n' ? 'i' : x) + ' % ' + n | |
return (x == 'n' ? INT : '') + name | |
}) | |
.replace(/(\w+ (!?)= )([0-9.]+,[0-9.,]+)/g, function (_, expr, not, list) { // lists | |
return '(' + expr + list.split(',').join((not ? AND : OR) + expr) + ')' | |
}) | |
.replace(/(\w+) (!?)= ([0-9]+)\.\.([0-9]+)/g, function (_, x, not, a, b) { // ranges | |
return not ? | |
'(' + x + ' < ' + a + OR + x + ' > ' + b + ')' | |
: (x == 'n' ? INT : '') + x + ' >= ' + a + AND + x + ' <= ' + b | |
}) | |
.replace(/ and /g, AND) | |
.replace(/ or /g, OR) | |
.replace(/ = /g, EQ) | |
} | |
function compile (rules, vars) { // compile plural function and returns if tests runs OK | |
var body = rules.map(prop('source')).join('\n') | |
var fn = new Function('n', '\t' + (vars.parse(body) + body).replace(/\n/g, '\n\t')) | |
fn.types = rules.map(prop('type')) | |
rules.forEach(function (rule) { test(fn, rule.type, rule.test) }) | |
return fn | |
} | |
function test (fn, expected, values) { // test if function returns as expected for given values | |
values.forEach(function (n) { | |
var result = fn(n) | |
if (result != expected) | |
throw Error('n = ' + n + ' -> ' + result + ', expected ' + expected) | |
}) | |
} | |
} | |
function langs (compile) { // langs batch rules parser | |
return function langs (dictionary) { | |
return reduce(build, Object.create({factory: factory}), dictionary) | |
} | |
// Utils | |
function reduce (reducer, initial, object) { // reduce object, fp style | |
return Object.keys(object) | |
.reduce(function (acc, key) { return reducer(acc, object[key], key) } , initial) | |
} | |
function variable (index) { // generate variable names: 'a', 'b', ..., 'z', 'a1', 'b2', etc. | |
return String.fromCharCode(index % 26 + 97)+ (index / 26 | 0 || '') | |
} | |
function indent (source) { return String(source).replace(/\n/g, '\n\t') } // indent code | |
// Langs parser helpers | |
function build (langs, rules, lang) { // build langs plural hash | |
try { langs[lang] = compile(rules) } | |
catch (error) { throw Error('compile ' + lang + ' plural failed (' + error.message + ')') } | |
return langs | |
} | |
function factory () { // compile factory of langs plural hash | |
var dedupes = reduce(dedupe, {fns: {}, types: {}}, this) | |
var build = source.bind(dedupes.types) | |
var sources = reduce(build, {refs: [], types: [], props: []}, dedupes.fns) | |
var LF = '\n', LFC = LF + ', ' | |
return new Function('', indent(['\tvar ' + indent(sources.refs.join(LFC)) | |
, 'function types (fn, types) { fn.types = types.slice() }' | |
, sources.types.join(LF) | |
, 'return {' + indent(LF + sources.props.join(LFC)) | |
, '}'].join(LF))) | |
} | |
function dedupe (dedupe, fn, lang) { // dedupe plural fn definitions and types | |
var fns = dedupe.fns | |
fns[fn] = { langs: fns[fn] ? fns[fn].langs.concat(lang) : [lang], fn: fn } | |
dedupe.types[fn.types] = { list: fn.types } | |
return dedupe | |
} | |
function source (source, dedupe) { // build source parts from deduped fn definitions and types | |
var types = this[dedupe.fn.types] | |
var name = variable(source.refs.length) | |
source.refs.push(name + ' = ' + dedupe.fn) | |
if (!types.name) { | |
types.name = variable(source.refs.length) | |
source.refs.push(types.name + ' = ' + JSON.stringify(types.list)) | |
} | |
source.types.push('types(' + name + ', ' + types.name + ')') | |
dedupe.langs.forEach(function (lang) { source.props.push('"' + lang + '": ' + name) }) | |
return source | |
} | |
} | |
// Utils | |
function pick (keys, object) { // pick keys in given object | |
return Object.keys(object) | |
.filter(function (key) { return keys.indexOf(key) !== -1 }) | |
.reduce(function (pick, key) { return pick[key] = object[key], pick }, {}) | |
} | |
function identity (a) { return a } // no comment | |
// Source format helpers | |
function clean (source) { // cleanup source code generated by new Function | |
return String(source) | |
.replace(/(\bfunction )(anonymous)?/g, '$1') | |
.replace(/\s*\/\*\*\//g, '') | |
} | |
function umd (root, name, factory) { // small UMD loader | |
if (typeof define === 'function' && define.amd) { | |
define(name, factory()) | |
} else if (typeof exports === 'object') { | |
module.exports = factory() | |
} else { | |
root[name] = factory() | |
} | |
} | |
function source (name, factory) { // format source with UMD loader | |
return '('+ umd + ')(this, "' + name + '", ' + factory + ');' | |
} | |
// Easter egg ;) | |
function gettext () { | |
return function gettext (rule) { | |
var expr = parse(/\bplural\s*=\s*(.+)$/, rule) | |
var n = parse(/\bnplurals\s*=\s*(\d)\b/, rule) | |
if (/[^n!=<>()%|&?:\s\d]/.test(expr)) | |
throw Error('unsafe char in plural expression: ' + expr) | |
return new Function('n', '\t' + [ | |
, 'var plural = parseInt(' + expr + ', 10)' | |
, 'if (plural < 0 || plural >= ' + n + ')' | |
, '\tthrow Error("invalid plural: " + plural)' | |
, 'return plural'].join('\n\t') | |
) | |
} | |
// Parse helper | |
function parse (pattern, string) { | |
try { return pattern.exec(string).pop() } | |
catch (e) { throw Error('unable to parse: ' + string) } | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment