Created
May 16, 2012 06:39
-
-
Save fedmich/2708094 to your computer and use it in GitHub Desktop.
GetHTML()
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/* | |
* @author Fedmich | |
* @version v1.1 | |
* File caching version | |
*/ | |
function GetHTML($url, $secs = 900) { | |
$md5 = md5($url); | |
$file = dirname(__FILE__) . "/cache/page_$md5.tmp"; | |
if (file_exists($file)) { | |
if ((time() - filemtime($file)) < $secs) { | |
$content = file_get_contents($file); | |
if ($content) { | |
return $content; | |
} | |
} | |
} | |
$content = curl_page($url); | |
file_put_contents($file, $content); | |
return $content; | |
} | |
function curl_page($url) { | |
$curl = curl_init($url); | |
curl_setopt($curl, CURLOPT_TIMEOUT, 10); | |
curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.122 Safari/534.30"); | |
//curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.2; rv:9.0.1) Gecko/20100101 Firefox/9.0.1"); | |
curl_setopt($curl, CURLOPT_RETURNTRANSFER, TRUE); | |
curl_setopt($curl, CURLOPT_HEADER, 0); | |
curl_setopt($curl, CURLOPT_ENCODING, 'UTF-8'); | |
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE); | |
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true); | |
curl_setopt($curl, CURLOPT_MAXREDIRS, 10); | |
$curlResult = curl_exec($curl); | |
$curl_info = curl_getinfo($curl, CURLINFO_CONTENT_TYPE); | |
$httpStatus = curl_getinfo($curl, CURLINFO_HTTP_CODE); | |
curl_close($curl); | |
if ($httpStatus == 404) { | |
return ''; | |
} | |
return $curlResult; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/* | |
* @author Fedmich | |
* @version v1.3 | |
* File caching version | |
*/ | |
function GetHTML($url, $secs = 900) { | |
$md5 = md5($url); | |
$file = dirname(__FILE__) . "/cache/page_$md5.tmp"; | |
if( class_exists('Memcache') ){ | |
$use_memcache = 1; | |
$memcache = new Memcache; | |
$memcache->connect('localhost', 11211) or die ("Memcache is not available"); | |
$mem_key = "page_$md5"; | |
$content = $memcache->get( $mem_key ); | |
if( $content ){ | |
return $content; | |
} | |
} | |
else{ | |
$use_memcache = 0; | |
if (file_exists($file)) { | |
if ((time() - filemtime($file)) < $secs) { | |
$content = file_get_contents($file); | |
if ($content) { | |
return $content; | |
} | |
} | |
} | |
} | |
$content = curl_page($url); | |
if( $use_memcache ){ | |
$memcache->set($mem_key, $content, MEMCACHE_COMPRESSED , $secs) or | |
die ("Can't save to Memcache"); | |
} | |
else{ | |
file_put_contents($file, $content); | |
} | |
return $content; | |
} | |
function curl_page($url) { | |
$curl = curl_init($url); | |
curl_setopt($curl, CURLOPT_TIMEOUT, 10); | |
curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.122 Safari/534.30"); | |
//curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.2; rv:9.0.1) Gecko/20100101 Firefox/9.0.1"); | |
curl_setopt($curl, CURLOPT_RETURNTRANSFER, TRUE); | |
curl_setopt($curl, CURLOPT_HEADER, 0); | |
curl_setopt($curl, CURLOPT_ENCODING, 'UTF-8'); | |
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE); | |
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true); | |
curl_setopt($curl, CURLOPT_MAXREDIRS, 10); | |
$curlResult = curl_exec($curl); | |
$curl_info = curl_getinfo($curl, CURLINFO_CONTENT_TYPE); | |
$httpStatus = curl_getinfo($curl, CURLINFO_HTTP_CODE); | |
curl_close($curl); | |
if ($httpStatus == 404) { | |
return ''; | |
} | |
return $curlResult; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment