Skip to content

Instantly share code, notes, and snippets.

@elialejandro
Created July 22, 2015 05:00
Show Gist options
  • Save elialejandro/ccc5b7d3a2362750abbb to your computer and use it in GitHub Desktop.
Save elialejandro/ccc5b7d3a2362750abbb to your computer and use it in GitHub Desktop.
<?php
$url = 'http://consultas.curp.gob.mx/CurpSP/';
// disguises the curl using fake headers and a fake user agent.
function disguise_curl($url)
{
$curl = curl_init();
// Setup headers - I used the same headers from Firefox version 2.0.0.6
// below was split up because php.net said the line was too long. :/
$header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,";
$header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
$header[] = "Cache-Control: max-age=0";
$header[] = "Connection: keep-alive";
$header[] = "Keep-Alive: 300";
$header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
$header[] = "Accept-Language: en-us,en;q=0.5";
$header[] = "Pragma: "; // browsers keep this blank.
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_USERAGENT, 'Googlebot/2.1 (+http://www.google.com/bot.html)');
curl_setopt($curl, CURLOPT_HTTPHEADER, $header);
curl_setopt($curl, CURLOPT_REFERER, 'http://www.google.com');
curl_setopt($curl, CURLOPT_ENCODING, 'gzip,deflate');
curl_setopt($curl, CURLOPT_AUTOREFERER, true);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_TIMEOUT, 10);
$html = curl_exec($curl); // execute the curl command
curl_close($curl); // close the connection
return $html; // and finally, return $html
}
// uses the function and displays the text off the website
$text = "<html>" . disguise_curl($url);
$text = "<html>" . preg_replace('/<[\/]?html>/', "",$text) . "</html>";
// $xml = new SimpleXMLElement($text);
$doc = new DOMDocument();
$doc->loadHTML('<?xml encoding="UTF-8">' .$text);
include 'Zend/Debug.php';
Zend_Debug::dump($doc->getElementsByTagName("img"));
foreach ($doc->getElementsByTagName("img") as $item) {
Zend_Debug::dump($item->getAttribute("src"));
echo "http://consultas.curp.gob.mx/CurpSP/" . $item->getAttribute("src");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment