Last active
June 16, 2024 11:32
-
-
Save ArtemAvramenko/6a1ccb2d555cbff1ab555af2a96fdef2 to your computer and use it in GitHub Desktop.
JavaScript code to read unicode text from a file on the browser side
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function readTextFromFile(file, maxMegabytes = 1) { | |
const reader = new FileReader(); | |
return new Promise(resolve => { | |
// check file size | |
if (!file || !file.size) { | |
resolve({ error: `The file cannot be empty` }); | |
return; | |
} | |
if (file.size > maxMegabytes * 0x100000) { | |
resolve({ error: `The file size should not exceed ${maxMegabytes} MB` }); | |
return; | |
} | |
const filename = file.name; | |
reader.onerror = e => { | |
resolve({ error: 'Cannot read a file from a specified location' }); | |
}; | |
reader.onload = e => { | |
try { | |
const data = new Uint8Array(e.target.result); | |
// https://en.wikipedia.org/wiki/Byte_order_mark | |
let encoding = 'utf-8'; | |
let checkMojibakes = false; | |
if (data[0] == 0xFE && data[1] == 0xFF) { | |
encoding = 'utf-16be'; | |
} else if (data[0] == 0xFF && data[1] == 0xFE) { | |
encoding = 'utf-16'; | |
} else { | |
checkMojibakes = data[0] != 0xEF || data[1] != 0xBB || data[2] != 0xBF; | |
} | |
// parse and remove null terminators | |
let text = new TextDecoder(encoding).decode(data); | |
text = text.replace(/\0+$/, ''); | |
// check invalid text | |
if (text.indexOf('\0') >= 0 || | |
checkMojibakes && text.indexOf('\uFFFD') >= 0) { | |
resolve({ error: 'The file must use UTF-8 or UTF-16 encoding' }); | |
} else { | |
resolve({ text, filename }); | |
} | |
} catch(e) { | |
resolve({ error: e.toString() }); | |
} | |
}; | |
try { | |
reader.readAsArrayBuffer(file); | |
} catch(e) { | |
resolve({ error: e.toString() }); | |
} | |
}); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment