Last active
January 28, 2017 18:39
-
-
Save AlexR1712/846cd5754a8118b029a45b9d480ecf4a to your computer and use it in GitHub Desktop.
Quotes Web Scrapping for http://frasesmotivacion.net/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
function get_http_response_code($url) | |
{ | |
$headers = get_headers($url); | |
return substr($headers[0], 9, 3); | |
} | |
function getQuotes($page="") | |
{ | |
try { | |
$url = 'http://frasesmotivacion.net'; | |
$code = get_http_response_code($url.$page); | |
if ($code != 200) { | |
echo "Error ".$code; | |
return false; | |
} else { | |
$raw = @file_get_contents($url.$page); //replace with user | |
$doc = new DOMDocument(); | |
@$doc->loadHTML($raw); | |
$xpath = new DOMXPath($doc); | |
$items = $xpath->query('//div[@class="quote-v3"]//blockquote'); | |
$quotes = []; | |
foreach ($items as $key => $item) { | |
$quotes[] = [ | |
'text' => trim($item->getElementsByTagName("a")[1]->nodeValue), | |
'author' => str_replace('leer más frases de', '', trim($item->getElementsByTagName("a")[2]->nodeValue)), | |
'image' => $url.trim($item->getElementsByTagName("img")[0]->getAttribute("src")) | |
]; | |
} | |
/* Show Arrays of Quotes */ | |
return $quotes; | |
} | |
} catch (Exception $e) { | |
return $e; | |
} | |
} | |
echo "<pre>"; | |
// Show 30 pages of quotes, each page has 11 quotes. | |
for ($i=0; $i < 30; $i++) { | |
if ($i > 0) { | |
var_dump(getQuotes("/frases-motivadoras-$i")); | |
} else { | |
var_dump(getQuotes("/frases-motivadoras")); | |
} | |
} | |
echo "</pre>"; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment