-
-
Save erigobeli/b1493f7de04564f5a12b842537bcc1e1 to your computer and use it in GitHub Desktop.
Strip quotes from html emails in PHP. See https://b-alidra.com/strip-quoted-text-from-html-emails/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Remove the quoted parts from the message body. | |
* | |
* It actually handles: | |
* - Standard <blockquote>...</blockquote> | |
* - Yahoo | |
* - Thunderbird | |
* - OSX Mail Client | |
* - Roundcube | |
* | |
* @param Message $message | |
* The incoming or outcoming message to be cleaned | |
* | |
* @return Message | |
* The cleaned message | |
*/ | |
public static function strip_quotes_from_message(Message $message) | |
{ | |
$els_to_remove = [ | |
'blockquote', // Standard quote block tag | |
'div.moz-cite-prefix', // Thunderbird | |
'div.gmail_extra', 'div.gmail_quote', // Gmail | |
'div.yahoo_quoted' // Yahoo | |
]; | |
$dom = new PHPHtmlParser\Dom; | |
$dom->load($message->body); | |
foreach ($els_to_remove as $el) { | |
$founds = $dom->find($el)->toArray(); | |
foreach ($founds as $f) { | |
$f->delete(); | |
unset($f); | |
} | |
} | |
// Outlook doesn't respect | |
// http://www.w3.org/TR/1998/NOTE-HTMLThreading-0105#Appendix%20B | |
// We need to detect quoted replies "by hand" | |
// | |
// Example of Outlook quote: | |
// | |
// <div> | |
// <hr id="stopSpelling"> | |
// Date: Fri. 20 May 2016 17:40:24 +0200<br> | |
// Subject: Votre facture Selon devis DEV201605201<br> | |
// From: [email protected]<br> | |
// To: [email protected]<br> | |
// Lorem ipsum dolor sit amet consectetur adipiscing... | |
// </div> | |
// | |
// The idea is to delete #stopSpelling's parent... | |
$hr = $dom->find('#stopSpelling', /*nth result*/0); | |
if (null !== $hr) { | |
$hr->getParent()->delete(); | |
} | |
// Roundcube adds a <p> with a sentence like this one, just | |
// before the quote: | |
// "Le 21-05-2016 02:25, AB Prog - Belkacem Alidra a écrit :" | |
// Let's remove it | |
$pattern = '/Le [0-9]{2}-[0-9]{2}-[0-9]{4} [0-9]{2}:[0-9]{2}, [^:]+ a écrit :/'; | |
$ps = $dom->find('p')->toArray(); | |
foreach ($ps as $p) { | |
if (preg_match($pattern, $p->text())) { | |
$p->delete(); | |
unset($p); | |
} | |
} | |
// Let's remove empty tags like <p> </p>... | |
$els = $dom->find('p,span,b,strong,div')->toArray(); | |
foreach ($els as $e) { | |
$html = trim($e->innerHtml()); | |
if (empty($html) || $html == " ") { | |
$e->delete(); | |
unset($e); | |
} | |
} | |
$message->body = $dom->root->innerHtml(); | |
return $message; | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Sadly, the html parser library is no longer actively supported. paquettg/php-html-parser#294
Would you be able to update your script (that appears to be tremendously useful!) so that it works with https://packagist.org/packages/voku/simple_html_dom instead?