Created
October 6, 2008 23:27
-
-
Save astro/15176 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-module(codepoint). | |
-export([codepoint_to_utf8/1, test/0]). | |
%% See: http://en.wikipedia.org/wiki/Utf-8#Description | |
%% TODO: endianess | |
codepoint_to_utf8(CP) when CP =< 16#7F -> | |
[CP]; | |
codepoint_to_utf8(CP) when CP =< 16#7FF -> | |
[2#11000000 bor (CP bsr 6), | |
2#10000000 bor (CP band 2#111111)]; | |
codepoint_to_utf8(CP) when CP =< 16#FFFF -> | |
[2#11100000 bor (CP bsr 12), | |
2#10000000 bor ((CP bsr 6) band 2#111111), | |
2#10000000 bor (CP band 2#111111)]; | |
codepoint_to_utf8(CP) when CP =< 16#10FFFF -> | |
[2#11110000 bor (CP bsr 18), | |
2#10000000 bor ((CP bsr 12) band 2#111111), | |
2#10000000 bor ((CP bsr 6) band 2#111111), | |
2#10000000 bor (CP band 2#111111)]. | |
test() -> | |
" " = codepoint_to_utf8(32), | |
"Ï" = codepoint_to_utf8(207), | |
"€" = codepoint_to_utf8(16#20AC), | |
"�" = codepoint_to_utf8(16#FFFD), | |
"𐑇" = codepoint_to_utf8(66631), | |
ok. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment