Last active
December 12, 2015 05:19
-
-
Save mpenkov/4721121 to your computer and use it in GitHub Desktop.
Character encoding
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!-- | |
vim: shiftwidth=2 | |
--> | |
<html> | |
<head><title>Clobber</title></head> | |
<body> | |
<form> | |
<textarea id="textArea" readonly="true" rows="10" cols="70"></textarea><br/> | |
<input id="inputText" type="text" text="Enter some text here" size="50" value="Enter some Unicode here..."> | |
<input id="checkbox" type="checkbox">clobber the submitted text</input> | |
<input type="button" value="Msg" onclick="onSubmit();"> | |
</form> | |
<script> | |
// Append the text from inputText to the textArea. Optionally, clobber the | |
// text by encoding it to ASCII prior to appending. | |
function onSubmit() { | |
var textArea = document.getElementById("textArea"); | |
var inputText = document.getElementById("inputText"); | |
if (textArea.value.length > 0) | |
textArea.value += "\n"; | |
var checkbox = document.getElementById("checkbox"); | |
var text; | |
if (checkbox.checked) { | |
// If you attempt to represent the string using a limited character set, | |
// then the characters that are not supported by that character set will | |
// be irrecoverably lost. | |
// | |
// Note that this problem is not ASCII-specific: any other encoding that | |
// does not support the same character set as Unicode, such as KOI-8, | |
// will also cause the same problem. | |
text = toUtf16(toAscii(inputText.value)); | |
} else { | |
// If you keep characters in JavaScript's native UTF-16, then everything | |
// will work properly, since that encoding supports any character | |
// imaginable. | |
text = inputText.value; | |
} | |
textArea.value += text; | |
inputText.value = ""; | |
} | |
// Converts a UTF-16 string to an array of ASCII codes. | |
// Non-ASCII characters are replaced with "?", since they cannot be represented | |
// by the ASCII character set. Returns the array. | |
function toAscii(utf16) { | |
ascii = Array(); | |
for (var i = 0; i < utf16.length; ++i) { | |
code = utf16.charCodeAt(i); | |
if (code > 255) | |
code = "?".charCodeAt(0); | |
ascii[i] = code; | |
} | |
return ascii; | |
} | |
// Converts an array of ASCII codes into a normal JavaScript string (signed | |
// UTF-16). Returns the string. | |
function toUtf16(ascii) { | |
utf16 = Array(); | |
for (var i = 0; i < ascii.length; ++i) | |
utf16[i] = String.fromCharCode(ascii[i]); | |
return utf16.join(""); | |
} | |
</script> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment