Created
September 4, 2013 14:36
-
-
Save Aaron3/6437837 to your computer and use it in GitHub Desktop.
Cleanup everything dumb authors can screw up on a website. (Keeping the rules readable. Efficiency be damned)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
add_filter('content_save_pre', 'cleanup_annoying_writers'); | |
function cleanup_annoying_writers($content) | |
{ | |
remove_filter('content_save_pre', array($this, 'cleanup_annoying_writers')); | |
$content =trim(stripslashes($content)); | |
//No JS | |
if (strpos($content,'</script>') !== false) { | |
$content = preg_replace('/<\/script>/', "\n\n", $content); | |
$content = preg_replace('/<script[^>]*>/', "\n\n", $content); | |
} | |
//No CSS | |
if (strpos($content,'</style>') !== false) { | |
$content = preg_replace('/<\/style>/', "\n\n", $content); | |
$content = preg_replace('/<style[^>]*>/', "\n\n", $content); | |
} | |
//No Divs | |
if (strpos($content,'</div>') !== false) { | |
$content = preg_replace('/[\s]*<\/div>[\s]*/', "\n\n", $content); | |
$content = preg_replace('/[\s]*<div[^>]*>[\s]*/', "\n\n", $content); | |
} | |
//No paragraph tags | |
if (strpos($content,'</p>') !== false) { | |
$content = preg_replace('/[\s]*<\/p>[\s]*/', "\n\n", $content); | |
$content = preg_replace('/[\s]*<p[^>]*>[\s]*/', "\n\n", $content); | |
} | |
$content =trim($content); | |
// No Spans | |
if (strpos($content,'</span>') !== false) { | |
$content = preg_replace('/<\/span>/', "", $content); | |
$content = preg_replace('/<span[^>]*>/', "", $content); | |
} | |
//No Inline CSS | |
if (strpos($content,'style=') !== false) { | |
$content = preg_replace('/style="[^"]+"/', "", $content); | |
$content = preg_replace("/style='[^']+'/", "", $content); | |
} | |
//except counter-resets | |
if (strpos($content,'start=') !== false) { | |
$content = preg_replace('/start="([^"]+)"/', 'start="${1}" style="counter-reset:headers ${1};"', $content); | |
$content = preg_replace("/start='([^']+)'/", 'start="${1}" style="counter-reset:headers ${1};"', $content); | |
} | |
//No Center tags allowed | |
if (strpos($content,'<center>') !== false) { | |
$content = str_replace('</center>', '', $content); | |
$content = str_replace('<center>', '', $content); | |
} | |
//em not i. | |
if (strpos($content,'<i>') !== false) { //change <i> tags to <em> | |
$content = str_replace('<i>', '<em>', $content); | |
$content = str_replace('</i>', '</em>', $content); | |
} | |
//strong not bolds | |
if (strpos($content,'<b>') !== false) { //change <i> tags to <em> | |
$content = str_replace('<b>', '<strong>', $content); | |
$content = str_replace('</b>', '</strong>', $content); | |
} | |
//Remove character non-breaking spaces | |
if(strpos($content,' ') !== false) //remove all non-breaking spaces from word. | |
$content = str_replace(' ', ' ', $content); | |
//Remove entity non-breaking spaces | |
if(strpos($content,' ') !== false) //no entity non-breaking spaces either. | |
$content = str_replace(' ', ' ', $content); | |
//Move spaces outside A tags and Em Tags | |
//If there is a word character or space after, move it outside. | |
//If there is not, delete it. | |
if (strpos($content,' </a>') !== false) { | |
$content = preg_replace('/[\s]+<\/a>(?=[a-zA-Z0-9 ])/', '</a> ', $content); | |
$content = preg_replace('/[\s]+<\/a>/', '</a>', $content); | |
} | |
if (strpos($content,' </em>') !== false) { | |
$content = preg_replace('/[\s]+<\/em>(?=[a-zA-Z0-9 ])/', '</em> ', $content); | |
$content = preg_replace('/[\s]+<\/em>/', '</em>', $content); | |
$content =trim($content); | |
} | |
$content = preg_replace('/ ([\.\!\?\,])/', '${1}', $content); | |
$content =trim($content); | |
//No More more tags | |
if(strpos($content,'<!--more-->') !== false) // get rid of more tags | |
$content = str_replace('<!--more-->', '', $content); | |
//no entity &'s in text | |
if(strpos($content,'&') !== false) // get rid of double spaces | |
$content = str_replace(' & ', ' & ', $content); | |
//No double spaces | |
if(strpos($content,' ') !== false) // get rid of multiple spaces | |
$content = preg_replace('! +!', ' ', $content); | |
//remove extra spaces between tags and empty tags | |
//back to back a tags are allowed | |
if(strpos($content,'> <') !== false) | |
$content = preg_replace('/!> +<(?!a)/', '><', $content); | |
if (strpos($content,'></') !== false) { //get rid of common empty tags | |
$content = str_replace('<blockquote></blockquote>', '', $content); | |
$content = str_replace('<p></p>', '', $content); | |
$content = str_replace('<div></div>', '', $content); | |
$content = str_replace('<span></span>', '', $content); | |
$content = str_replace('<em></em>', '', $content); | |
$content = str_replace('<strong></strong>', '', $content); | |
} | |
if (strpos($content,'<br') !== false) { // remove empty p tags with lots of white space | |
$content = preg_replace('/<br[\s]*\/?>[\s]*/', "\n", $content); | |
} | |
//No fancy quotes. | |
$content = str_replace('“', '"', $content); | |
$content = str_replace('”', '"', $content); | |
$content = str_replace('’', "'", $content); | |
$content = str_replace('‘', "'", $content); | |
/* | |
//If you really can't trust the morons to do anything right: | |
//Load it up to make sure it's all formatted well, then spit it out. | |
@$dom->loadHTML('<?xml encoding="UTF-8">' . $content ); | |
$content = $dom->saveHTML($dom->getElementsByTagName('body')->item(0)); | |
$content = trim(trim(str_replace(array(' ','<body>', '</body>'), array("\n",''), $content))); | |
*/ | |
//remove windows newlines | |
if (strpos($content,"\r\n") !== false) { | |
$content = str_replace("\r\n", "\n", $content); | |
} | |
//remove windows newlines | |
if (strpos($content,"\r") !== false) { | |
$content = str_replace("\r", "\n", $content); | |
} | |
if (strpos($content,"\n\n\n") !== false) { //remove large blocks of new lines | |
$content = preg_replace('!\n\n\n+!', "\n\n", $content); | |
} | |
//$content = mb_convert_encoding($content, "UTF-8"); | |
//trim it all one last time. | |
$content =trim($content); | |
return $content; | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment