Last active
December 29, 2015 07:59
-
-
Save jamescridland/7639769 to your computer and use it in GitHub Desktop.
This will scrape information from a Google+ page, if given a URL. The "followers" uses a class that doubtless will change; but you can hopefully see how it works.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<? | |
$url['url']='https://plus.google.com/107170242245169064947/posts'; | |
if (isset($url['url'])) { | |
$d = new DOMDocument(); | |
@$d->loadHTML(file_get_contents($url['url'])); | |
$xpath = new DOMXPath($d); | |
$found['followers'] = $xpath->query('/html/body//span[@class="d-s o5a"]')->item(0)->textContent; | |
//This appears twice, and is the total followers for this property | |
list($found['followers'],$foo) = explode(" ",$found['followers']); | |
$found['name'] = $xpath->query("//*[@itemprop='name']")->item(0)->getAttribute('content'); | |
$found['url'] = $xpath->query("//*[@itemprop='url']")->item(0)->getAttribute('content'); | |
$found['image'] = 'https:'.$xpath->query("//*[@itemprop='image']")->item(0)->getAttribute('content'); | |
$found['id'] = $xpath->query("//*[@itemprop='url']")->item(0)->getAttribute('content'); | |
$found['id'] = substr($found['id'],strrpos($found['id'], "/")+1); | |
print_r($found); | |
} else { echo 'Nothing to do: aborting'; } | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment