Created
November 15, 2014 04:40
-
-
Save mazhar266/d9a5524f8e66f8cfd525 to your computer and use it in GitHub Desktop.
PHP's handy strip_tags( ) function removes HTML tags that look like <word...>, <word.../>, or </word>. However, it doesn't understand the tags it's removing. So We fixed it.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Remove HTML tags, including invisible text such as style and | |
* script code, and embedded objects. Add line breaks around | |
* block-level tags to prevent word joining after tag removal. | |
*/ | |
function strip_html_tags( $text ) | |
{ | |
$text = preg_replace( | |
array( | |
// Remove invisible content | |
'@<head[^>]*?>.*?</head>@siu', | |
'@<style[^>]*?>.*?</style>@siu', | |
'@<script[^>]*?.*?</script>@siu', | |
'@<object[^>]*?.*?</object>@siu', | |
'@<embed[^>]*?.*?</embed>@siu', | |
'@<applet[^>]*?.*?</applet>@siu', | |
'@<noframes[^>]*?.*?</noframes>@siu', | |
'@<noscript[^>]*?.*?</noscript>@siu', | |
'@<noembed[^>]*?.*?</noembed>@siu', | |
// Add line breaks before and after blocks | |
'@</?((address)|(blockquote)|(center)|(del))@iu', | |
'@</?((div)|(h[1-9])|(ins)|(isindex)|(p)|(pre))@iu', | |
'@</?((dir)|(dl)|(dt)|(dd)|(li)|(menu)|(ol)|(ul))@iu', | |
'@</?((table)|(th)|(td)|(caption))@iu', | |
'@</?((form)|(button)|(fieldset)|(legend)|(input))@iu', | |
'@</?((label)|(select)|(optgroup)|(option)|(textarea))@iu', | |
'@</?((frameset)|(frame)|(iframe))@iu', | |
), | |
array( | |
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', | |
"\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0", | |
"\n\$0", "\n\$0", | |
), | |
$text ); | |
return strip_tags( $text ); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment