Last active
May 9, 2018 13:43
-
-
Save enniosousa/7e8b451041ded6a0d51fd60992639296 to your computer and use it in GitHub Desktop.
Strip HTML tags, remove attributes and empty tags
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Strip HTML tags, remove attributes and empty tags | |
* | |
* @param string $html — HTML string | |
* @param array $preserve_tags — HTML tags that you wnat preserve | |
* @param array $remove_attrs — HTML attributes that you want remove. You can use remove attributes that starts with | |
* @param bool $remove_empty_tags — remove empty tags, that is without content | |
* @return string | |
*/ | |
function strip_html_content( | |
string $html, | |
array $preserve_tags = ['p', 'br', 'b', 'strong', 'i', 'em', 'ul', 'ol', 'li', 's', 'a'], | |
array $remove_attrs = ['style', 'data-*', 'id', 'class', 'on*'], | |
bool $remove_empty_tags = true | |
): string{ | |
$return = null; | |
if(!empty($preserve_tags)) | |
$return = $striped_tags = strip_tags($html, '<'.implode('><', array_values($preserve_tags)).'>'); | |
if(!empty($remove_attrs)){ | |
$regex_rm_attr = implode('|', $remove_attrs); | |
$regex_rm_attr = str_replace("*", "[a-z]+", $regex_rm_attr); | |
$regex_rm_attr = '/ ('.$regex_rm_attr.')="[^\"]*"/m'; | |
$return = $striped_attr = preg_replace($regex_rm_attr, '', $return); | |
} | |
//adapted from https://stackoverflow.com/a/29177760/4830771 | |
if($remove_empty_tags){ | |
$return = str_replace( '> <', '> <', $return); //replace blank space html entity to blank space char | |
do { | |
$tmp = $return; | |
$return = preg_replace('#<([^ >]+)[^>]*>[[:space:]]*</\1>#', '', $return); | |
} while ($return !== $tmp); | |
} | |
return $return; | |
} | |
$html = "<p onclick=\"alert('')\"><a class=\"_58cn\" data-ft=\"{"tn":"*N","type":104}\" href=\"https://www.facebook.com/hashtag/sol%C3%ADlovers\" style=\"color:#365899; cursor:pointer; text-decoration-line:none; font-family:Helvetica, Arial, sans-serif; font-size:14px; background-color:#ffffff\">#solílovers</a><span style=\"font-size:14px\"><span style=\"color:#1d2129\"><span style=\"font-family:Helvetica, Arial, sans-serif\"><span style=\"background-color:#ffffff\"> Amanhã temos descontos na nossa loja de calçados na coleção nova... Além do Super Bazar do Pátio!</span></span></span></span></p><p> </p><p> </p><p><span style=\"font-size:11px;\"><em>Pátio Buriti<br />Feira de Santana-Bahia<br />(75) 3021-0821</em></span></p><p> </p><p><span style=\"font-size:11px;\"><em data-test=\"aaaaaaa\"><a href=\"http://www.okad.com.br/soli\">www.okad.com.br/soli</a></em></span></p><p> </p><p> </p><p> </p><p> </p><p> </p><p> </p>"; | |
echo strip_html_content($html); | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment