Created
September 18, 2013 19:18
-
-
Save JLarky/6614151 to your computer and use it in GitHub Desktop.
html_entity_decode (php name) function for erlang. For binaries: unicode:characters_to_binary(html_utils:html_entity_decode(binary_to_list(Text))).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-module(html_utils). | |
-export([html_entity_decode/1]). | |
%% -*- coding: utf-8 -*- | |
%% | |
%% %CopyrightBegin% | |
%% | |
%% Copyright Ericsson AB 2003-2012. All Rights Reserved. | |
%% | |
%% The contents of this file are subject to the Erlang Public License, | |
%% Version 1.1, (the "License"); you may not use this file except in | |
%% compliance with the License. You should have received a copy of the | |
%% Erlang Public License along with this software. If not, it can be | |
%% retrieved online at http://www.erlang.org/. | |
%% | |
%% Software distributed under the License is distributed on an "AS IS" | |
%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See | |
%% the License for the specific language governing rights and limitations | |
%% under the License. | |
%% | |
%% %CopyrightEnd% | |
%% | |
%% based on xmerl_scan module | |
html_entity_decode(Text) -> | |
scan_content(Text, []). | |
scan_content([], Acc) -> | |
lists:reverse(Acc); | |
scan_content("&" ++ T, Acc) -> | |
{Char, T2} = scan_reference(T), | |
scan_content(T2, Char++Acc); | |
scan_content([H | T], Acc) -> | |
scan_content(T, [H|Acc]). | |
scan_reference("#x" ++ T) -> | |
if hd(T) /= $; -> | |
scan_char_ref_hex(T, 0) | |
end; | |
scan_reference("#" ++ T) -> | |
if hd(T) /= $; -> | |
scan_char_ref_dec(T, []) | |
end; | |
scan_reference(T) -> | |
scan_entity_ref(T). | |
scan_char_ref_dec([H|T], Acc) when H >= $0, H =< $9 -> | |
scan_char_ref_dec(T, [H|Acc]); | |
scan_char_ref_dec(";" ++ T, Acc) -> | |
Ref = list_to_integer(lists:reverse(Acc)), | |
Ch = wfc_legal_char(Ref), | |
{[Ch], T}. | |
scan_char_ref_hex([H|T], Acc) when H >= $0, H =< $9 -> | |
Dec = H - $0, | |
scan_char_ref_hex(T, (Dec bor (Acc bsl 4))); | |
scan_char_ref_hex([H|T], Acc) when H >= $a, H =< $f -> | |
Dec = (H - $a) + 10, | |
scan_char_ref_hex(T, (Dec bor (Acc bsl 4))); | |
scan_char_ref_hex([H|T], Acc) when H >= $A, H =< $F -> | |
Dec = (H - $A) + 10, | |
scan_char_ref_hex(T, (Dec bor (Acc bsl 4))); | |
scan_char_ref_hex(";" ++ T, Acc) -> | |
Ch = wfc_legal_char(Acc), | |
{[Ch], T}. %% changed return value from [[Acc]] | |
wfc_legal_char(Ch) -> | |
true = xmerl_lib:is_char(Ch), Ch. | |
scan_entity_ref("amp;" ++ T) -> | |
{"&", T}; | |
scan_entity_ref("lt;" ++ T) -> | |
{"<", T}; | |
scan_entity_ref("gt;" ++ T) -> | |
{">", T}; | |
scan_entity_ref("apos;" ++ T) -> | |
{"'", T}; | |
scan_entity_ref("quot;" ++ T) -> | |
{"\"", T}; | |
scan_entity_ref(T) -> %% will work as echo | |
{"&", T}. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment