Skip to content

Instantly share code, notes, and snippets.

@zachbrowne
Created October 31, 2011 06:38
Show Gist options
  • Save zachbrowne/1327029 to your computer and use it in GitHub Desktop.
Save zachbrowne/1327029 to your computer and use it in GitHub Desktop.
Get Title and Meta Keywords from any site with PHP
<?php
function getUrlData($url)
{
$result = false;
$contents = getUrlContents($url);
if (isset($contents) && is_string($contents))
{
$title = null;
$metaTags = null;
preg_match('/<title>([^>]*)<\/title>/si', $contents, $match );
if (isset($match) && is_array($match) && count($match) > 0)
{
$title = strip_tags($match[1]);
}
preg_match_all('/<[\s]*meta[\s]*name="?' . '([^>"]*)"?[\s]*' .'[lang="]*[^>"]*["]*'.'[\s]*content="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $contents, $match);
if (isset($match) && is_array($match) && count($match) == 3)
{
$originals = $match[0];
$names = $match[1];
$values = $match[2];
if (count($originals) == count($names) && count($names) == count($values))
{
$metaTags = array();
for ($i=0, $limiti=count($names); $i < $limiti; $i++)
{
$metaname=strtolower($names[$i]);
$metaname=str_replace("'",'',$metaname);
$metaname=str_replace("/",'',$metaname);
$metaTags[$metaname] = array (
'html' => htmlentities($originals[$i]),
'value' => $values[$i]
);
}
}
}
if(sizeof($metaTags)==0) {
preg_match_all('/<[\s]*meta[\s]*content="?' . '([^>"]*)"?[\s]*' .'[lang="]*[^>"]*["]*'.'[\s]*name="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $contents, $match);
if (isset($match) && is_array($match) && count($match) == 3)
{
$originals = $match[0];
$names = $match[2];
$values = $match[1];
if (count($originals) == count($names) && count($names) == count($values))
{
$metaTags = array();
for ($i=0, $limiti=count($names); $i < $limiti; $i++)
{
$metaname=strtolower($names[$i]);
$metaname=str_replace("'",'',$metaname);
$metaname=str_replace("/",'',$metaname);
$metaTags[$metaname] = array (
'html' => htmlentities($originals[$i]),
'value' => $values[$i]
);
}
}
}
}
$result = array (
'title' => $title,
'metaTags' => $metaTags
);
}
return $result;
}
function getUrlContents($url, $maximumRedirections = null, $currentRedirection = 0)
{
$result = false;
//global $contents;
$contents = @file_get_contents($url);
// Check if we need to go somewhere else
if (isset($contents) && is_string($contents))
{
preg_match_all('/<[\s]*meta[\s]*http-equiv="?REFRESH"?' . '[\s]*content="?[0-9]*;[\s]*URL[\s]*=[\s]*([^>"]*)"?' . '[\s]*[\/]?[\s]*>/si', $contents, $match);
if (isset($match) && is_array($match) && count($match) == 2 && count($match[1]) == 1)
{
if (!isset($maximumRedirections) || $currentRedirection < $maximumRedirections)
{
return getUrlContents($match[1][0], $maximumRedirections, ++$currentRedirection);
}
$result = false;
}
else
{
$result = $contents;
}
}
return $contents;
}
?>
//------------------Usage--------------------------------
$Domain='<a class="linkclass" href="http://www.samplephpcodes.com">http://www.samplephpcodes.com</a>'; // website
$result = getUrlData($Domain);
if($result['title']=="") {
$title="No Data Available";
} else {
$title=$result['title'];
}
if($result['metaTags']['description']['value']=="") {
$description="No Data Available";
} else {
$description=$result['metaTags']['description']['value'];
}
if($result['metaTags']['keywords']['value']=="") {
$keywords="No Data Available";
} else {
$keywords=$result['metaTags']['keywords']['value'];
}
echo '<br>Title - '.$title;
echo '<br>Description - '.$description;
echo '<br>Keywords - '.$keywords;
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment