Created
October 5, 2016 02:59
-
-
Save lucdkny/acebab4c2d5425f86eca0a6fd452ef54 to your computer and use it in GitHub Desktop.
How to PHP regex match an HTML document's meta description
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
function parseDescription($html) { | |
// Get the 'content' attribute value in a <meta name="description" ... /> | |
$matches = array(); | |
// Search for <meta name="description" content="Buy my stuff" /> | |
preg_match('/<meta.*?name=("|\')description("|\').*?content=("|\')(.*?)("|\')/i', $html, $matches); | |
if (count($matches) > 4) { | |
return trim($matches[4]); | |
} | |
// Order of attributes could be swapped around: <meta content="Buy my stuff" name="description" /> | |
preg_match('/<meta.*?content=("|\')(.*?)("|\').*?name=("|\')description("|\')/i', $html, $matches); | |
if (count($matches) > 2) { | |
return trim($matches[2]); | |
} | |
// No match | |
return null; | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment