Created
September 22, 2011 10:16
-
-
Save muratpurc/1234477 to your computer and use it in GitHub Desktop.
PHP: Extended ressource/file/url link check with redirect handling (Linkchecker)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Checks HTTP Links. | |
* Based on function phpLinkCheck from Johannes Froemter <[email protected]>, 2001-04-14. | |
* | |
* Usage: | |
* <code> | |
* $url = 'http://www.google.de/?q=foobar'; | |
* $res = mp_linkCheck($url); | |
* | |
* // key 'Status-Code' will contain the HTTP status code (e.g. 200 or 404). | |
* echo $res['Status-Code'] . "\n"; | |
* | |
* // key 'Location-Status-Code' will contain the status code of the new loaction, in case of a 3xx code (redirection). | |
* echo $res['Location-Status-Code'] . "\n"; | |
* | |
* // dump variable to see the complete result | |
* print_r($res); | |
* </code> | |
* | |
* @param string $url | |
* @param array $options Options array with following feasible values: | |
* - $options['max_redirects'] (int) Number of maximal redirects | |
* - $options['port'] (int) The port for the connection, default value is 80. | |
* Url containing a port definition overwrites the port in options! | |
* - $options['socket_timeout'] (int) Socket timeout in seconds, default value is 30. | |
* - $options['user_agent'] (string) User agent string. | |
*/ | |
function mp_linkCheck($url, array $options = array()) { | |
$url = trim($url); | |
if (strpos($url, '://') === false) { | |
$url = "http://$url"; | |
} | |
// parse url | |
$comp = parse_url($url); | |
if (strtolower($comp['scheme']) != 'http') { | |
return false; | |
} | |
// options check | |
if (!isset($options['max_redirects']) || (int) $options['max_redirects'] < 0) { | |
$options['max_redirects'] = 0; | |
} | |
if (!isset($options['port']) || (int) $options['port'] < 0) { | |
$options['port'] = 80; | |
} | |
$options['this.counter'] = (isset($options['this.counter'])) ? ++$$options['this.counter'] : 1; | |
if (!isset($options['socket_timeout']) || (int) $options['socket_timeout'] < 1) { | |
$options['socket_timeout'] = 30; | |
} | |
if (!isset($options['user_agent'])) { | |
$options['user_agent'] = null; | |
} else { | |
$options['user_agent'] = trim($options['user_agent']); | |
} | |
$host = $comp['host']; | |
$port = (isset($comp['port'])) ? $comp['port'] : $options['port']; | |
$path = $comp['path']; | |
if (isset($comp['query'])) { | |
$path .= '?' . $comp['query']; | |
} | |
if (isset($comp['fragment'])) { | |
$path .= '#' . $comp['fragment']; | |
} | |
// open connection | |
if (!$fp = fsockopen($host, $port, $errno, $errstr, $options['socket_timeout'])) { | |
return false; | |
} | |
// compose request | |
$httpRequest = "HEAD ". $path ." HTTP/1.1\r\n" | |
. "Host: ". $host ."\r\n"; | |
if ($options['user_agent']) { | |
$httpRequest .= "User-Agent: " . $options['user_agent'] . "\r\n"; | |
} | |
$httpRequest .= "Connection: close\r\n\r\n"; | |
// send request and read response | |
$httpResponse = ''; | |
fputs($fp, $httpRequest); | |
while (!feof($fp)) { | |
$httpResponse .= fgets($fp, 1024); | |
} | |
fclose($fp); | |
// extract | |
if (!preg_match('=^(HTTP/\d+\.\d+) (\d{3}) ([^\r\n]*)=', $httpResponse, $matches)) { | |
return false; | |
} | |
$http['Status-Line'] = $matches[0]; | |
$http['HTTP-Version'] = $matches[1]; | |
$http['Status-Code'] = $matches[2]; | |
$http['Reason-Phrase'] = $matches[3]; | |
if ($options['this.counter'] >= $options['max_redirects']) { | |
return $http; | |
} | |
$rclasses = array('Informational', 'Success', 'Redirection', 'Client Error', 'Server Error'); | |
$http['Response-Class'] = $rclasses[$http['Status-Code'][0] - 1]; | |
if (preg_match_all('=^(.+): ([^\r\n]*)=m', $head, $matches, PREG_SET_ORDER)) { | |
foreach ($matches as $line) { | |
$http[$line[1]] = $line[2]; | |
} | |
if ($http['Status-Code'][0] == 3) { | |
$http['Location-Status-Code'] = mp_linkCheck($http['Location'], $options); | |
} | |
} | |
return $http; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment