Skip to content

Instantly share code, notes, and snippets.

@MightyPork
Last active September 9, 2017 09:13
Show Gist options
  • Save MightyPork/692134588fa957a260a3469c59bdf869 to your computer and use it in GitHub Desktop.
Save MightyPork/692134588fa957a260a3469c59bdf869 to your computer and use it in GitHub Desktop.
Find unique non-ASCII characters in a UTF-8 file
let fs = require("fs");
let myArgs = process.argv.slice(2);
fs.readFile(myArgs[0], "utf8", function (err, data) {
if (err) throw err;
const ar = Array.from(data);
let table = [];
ar.forEach(function(k) {
if (k.codePointAt(0) > 127 && table.indexOf(k)<0) {
let bn = encodeURIComponent(k).replace(/%[A-F\d]{2}/g, 'U').length;
console.log(`\u001b[97;1m${k}\u001b[0m \u001b[${bn==2?'32':bn==3?'33':'31'}m(${bn} bytes)\u001b[0m`);
table.push(k);
}
});
console.log(table.join(' '));
console.log(`\u001b[97m${table.length} slots in look-up table required.\u001b[0m`);
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment