Last active
June 8, 2022 06:12
-
-
Save b-alidra/73a5080c2f90243a9d2e9f7ad370df53 to your computer and use it in GitHub Desktop.
Strip quotes from html emails in PHP. See https://b-alidra.com/strip-quoted-text-from-html-emails/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Remove the quoted parts from the message body. | |
* | |
* It actually handles: | |
* - Standard <blockquote>...</blockquote> | |
* - Yahoo | |
* - Thunderbird | |
* - OSX Mail Client | |
* - Roundcube | |
* | |
* @param Message $message | |
* The incoming or outcoming message to be cleaned | |
* | |
* @return Message | |
* The cleaned message | |
*/ | |
public static function strip_quotes_from_message(Message $message) | |
{ | |
$els_to_remove = [ | |
'blockquote', // Standard quote block tag | |
'div.moz-cite-prefix', // Thunderbird | |
'div.gmail_extra', 'div.gmail_quote', // Gmail | |
'div.yahoo_quoted' // Yahoo | |
]; | |
$dom = new PHPHtmlParser\Dom; | |
$dom->load($message->body); | |
foreach ($els_to_remove as $el) { | |
$founds = $dom->find($el)->toArray(); | |
foreach ($founds as $f) { | |
$f->delete(); | |
unset($f); | |
} | |
} | |
// Outlook doesn't respect | |
// http://www.w3.org/TR/1998/NOTE-HTMLThreading-0105#Appendix%20B | |
// We need to detect quoted replies "by hand" | |
// | |
// Example of Outlook quote: | |
// | |
// <div> | |
// <hr id="stopSpelling"> | |
// Date: Fri. 20 May 2016 17:40:24 +0200<br> | |
// Subject: Votre facture Selon devis DEV201605201<br> | |
// From: [email protected]<br> | |
// To: [email protected]<br> | |
// Lorem ipsum dolor sit amet consectetur adipiscing... | |
// </div> | |
// | |
// The idea is to delete #stopSpelling's parent... | |
$hr = $dom->find('#stopSpelling', /*nth result*/0); | |
if (null !== $hr) { | |
$hr->getParent()->delete(); | |
} | |
// Roundcube adds a <p> with a sentence like this one, just | |
// before the quote: | |
// "Le 21-05-2016 02:25, AB Prog - Belkacem Alidra a écrit :" | |
// Let's remove it | |
$pattern = '/Le [0-9]{2}-[0-9]{2}-[0-9]{4} [0-9]{2}:[0-9]{2}, [^:]+ a écrit :/'; | |
$ps = $dom->find('p')->toArray(); | |
foreach ($ps as $p) { | |
if (preg_match($pattern, $p->text())) { | |
$p->delete(); | |
unset($p); | |
} | |
} | |
// Let's remove empty tags like <p> </p>... | |
$els = $dom->find('p,span,b,strong,div')->toArray(); | |
foreach ($els as $e) { | |
$html = trim($e->innerHtml()); | |
if (empty($html) || $html == " ") { | |
$e->delete(); | |
unset($e); | |
} | |
} | |
$message->body = $dom->root->innerHtml(); | |
return $message; | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment