Last active
November 18, 2015 23:30
-
-
Save craiga/2723217 to your computer and use it in GitHub Desktop.
Get a URL.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
require_once("GetUrlHttpErrorException.php"); | |
include_once(dirname(__FILE__) . "/../removeOldFiles/removeOldFiles.php"); | |
/** | |
* Get a URL. | |
* | |
* Get a URL, optionally caching the response. Will remove old cached files if {@link https://gist.github.com/craiga/3161529 removeOldFiles} is present. | |
* | |
* @param $url The URL to get. | |
* @param $method The HTTP method to use. GET by default. | |
* @param $data Data to send as part of the request. Will be converted to a query string if method is GET. | |
* @param $additionalCurlOptions Additional options to be set in cURL. Will override anything set internally in this function. | |
* @param $tempMaxAge Maximum allowed age for temporary items. Zero by default, which disables the cache. | |
* @param $tempFreeSpace Amount of free space to ensure remains when writing temporary files. | |
* @param $tempDir Where temporary cache and cookie files are written. Will attempt to create this directory if it doesn't exist. Inside sys_get_temp_dir() by default. | |
* @param $tempPermissions Permissions to create temporary directories and cache files with. 0700 (only available to the creating user) by default. | |
* @param $responseHeaders If provided, this array will be populated with the response headers. | |
* | |
* @author Craig Anderson <[email protected]> | |
* @link https://gist.github.com/craiga/2723217 | |
*/ | |
function getUrl($url, $method = "GET", $data = array(), $additionalCurlOptions = array(), $tempMaxAge = 0, $tempFreeSpace = 0, $tempDir = null, $tempPermissions = 0700, &$responseHeaders = null) | |
{ | |
$rawResponse = null; | |
$cacheFile = null; | |
if (is_null($tempDir)) { | |
$tempDir = realpath(sys_get_temp_dir()) . DIRECTORY_SEPARATOR . "getUrlCache"; | |
} | |
if (!is_dir($tempDir)) { | |
if (!mkdir($tempDir, $tempPermissions, true)) { | |
throw new RuntimeException("Couldn't create temporary directory for cache and cookie files"); | |
} | |
// Permissions set by mkdir are modified by umask; we need to explicitly set them with chmod to ensure they take effect. | |
if (!chmod($tempDir, $tempPermissions)) { | |
throw new RuntimeException("Couldn't change permissions on newly created temporary directory"); | |
} | |
} | |
if (!is_readable($tempDir)) { | |
throw new RuntimeException("Temporary directory for cache and cookie files is not readable"); | |
} | |
if (!is_writeable($tempDir)) { | |
throw new RuntimeException("Temporary directory for cache and cookie files is not writeable"); | |
} | |
if (($method == "GET" || $method == "HEAD") && count($data) > 0 && strpos($url, "?") === false) { | |
$url = $url . "?" . http_build_query($data); | |
} | |
if ($method == "GET") { // only GET requests can be cached | |
if ($tempMaxAge > 0) { | |
$cacheFile = realpath($tempDir) . DIRECTORY_SEPARATOR . md5($method . $url); | |
if (file_exists($cacheFile)) { | |
$age = time() - filemtime($cacheFile); | |
if ($age < $tempMaxAge) { | |
$rawResponse = file_get_contents($cacheFile); | |
} | |
} | |
} | |
} | |
if (!is_null($tempDir) && function_exists("removeOldFiles")) { | |
removeOldFiles($tempDir, true, $tempMaxAge, $tempFreeSpace, true); | |
} | |
if (is_null($rawResponse)) { | |
try { | |
$curl = curl_init(); | |
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); | |
curl_setopt($curl, CURLOPT_URL, $url); | |
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true); | |
$cookieJar = $tempDir . DIRECTORY_SEPARATOR . getmypid() . ".cookiejar"; // TODO: Test this changes between Apache requests. | |
curl_setopt($curl, CURLOPT_COOKIEFILE, $cookieJar); | |
curl_setopt($curl, CURLOPT_COOKIEJAR, $cookieJar); | |
curl_setopt($curl, CURLOPT_HEADER, true); | |
if (getenv("http_proxy")) { | |
$proxy = getenv("http_proxy"); | |
$proxyHost = parse_url($proxy, PHP_URL_HOST); | |
$proxyPort = parse_url($proxy, PHP_URL_PORT); | |
if ($proxyHost && $proxyPort && preg_match("/^\d+$/", $proxyPort)) { | |
curl_setopt($curl, CURLOPT_PROXY, sprintf("%s:%d", $proxyHost, $proxyPort)); | |
} else { | |
if (!trigger_error(sprintf("http_proxy invalid; \"%s\" should contain a host name and port number", $proxy), E_USER_WARNING)) { | |
throw new RuntimeException("Couldn't trigger warning"); | |
} | |
} | |
} | |
switch (strtoupper($method)) { | |
case "POST": | |
curl_setopt($curl, CURLOPT_POST, true); | |
curl_setopt($curl, CURLOPT_POSTFIELDS, $data); | |
break; | |
case "GET": | |
curl_setopt($curl, CURLOPT_HTTPGET, true); | |
curl_setopt($curl, CURLOPT_URL, $url); | |
break; | |
case "HEAD": | |
curl_setopt($curl, CURLOPT_NOBODY, true); | |
curl_setopt($curl, CURLOPT_URL, $url); | |
break; | |
default: | |
// TODO: Support methods other than POST, GET and HEAD. | |
throw new InvalidArgumentException("Unsupported HTTP method; only POST, GET and HEAD methods are supported."); | |
} | |
curl_setopt_array($curl, $additionalCurlOptions); | |
$rawResponse = curl_exec($curl); | |
if ($rawResponse === false) { | |
throw new RuntimeException(sprintf("cURL Error %d: %s", curl_errno($curl), curl_error($curl))); | |
} | |
// We do this instead of setting CURLOPT_FAILONERROR so you have an | |
// opportunity to examine the response. | |
$responseStatus = curl_getinfo($curl, CURLINFO_HTTP_CODE); | |
if ($responseStatus >= 400) { | |
throw new GetUrlHttpErrorException($responseStatus, $curl, $rawResponse); | |
} | |
curl_close($curl); | |
} catch(Exception $e) { | |
@curl_close($curl); | |
throw $e; | |
} | |
if (!is_null($cacheFile)) { | |
if (!@file_put_contents($cacheFile, $rawResponse)) { | |
throw new RuntimeException("Couldn't write cache entry."); | |
} | |
if (!chmod($cacheFile, $tempPermissions)) { | |
throw new RuntimeException("Couldn't change permissions of cache entry"); | |
} | |
} | |
} | |
$rawResponseParts = explode("\r\n\r\n", $rawResponse); | |
$rawHeaderParts = array(); | |
$responseParts = array(); | |
foreach ($rawResponseParts as $rawResponsePart) { | |
if (preg_match("/^HTTP/", $rawResponsePart) && count($responseParts) == 0) { | |
$rawHeaderParts[] = $rawResponsePart; | |
} else { | |
$responseParts[] = $rawResponsePart; | |
} | |
} | |
$rawHeaders = implode("\r\n\r\n", $rawHeaderParts); | |
$response = implode("\r\n\r\n", $responseParts); | |
if (is_array($responseHeaders)) { | |
$responseHeaders = explode("\r\n", $rawHeaders); | |
// If response headers include an Expires header, set it to the time our cache expires. | |
if (!is_null($cacheFile) && file_exists($cacheFile)) { | |
foreach ($responseHeaders as $key => $responseHeader) { | |
if (preg_match("/^Expires:\s+(.*)$/", $responseHeader, $matches)) { | |
$theirExpires = strtotime($matches[1]); | |
$ourExpires = filemtime($cacheFile) + $tempMaxAge; | |
if ($ourExpires > $theirExpires) { | |
$responseHeaders[$key] = "Expires: " . date("r", $ourExpires); | |
} | |
} | |
} | |
} | |
} | |
return $response; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class GetUrlHttpErrorException extends RuntimeException | |
{ | |
protected $_httpStatusCode = null; | |
protected $_curlInfo = array(); | |
protected $_rawResponse = null; | |
protected $_responseBody = null; | |
protected $_responseHeaders = null; | |
public function __construct($httpStatusCode, $curl = null, $rawResponse = null, $message = "", $code = 0, Exception $previous = NULL) | |
{ | |
$this->_httpStatusCode = $httpStatusCode; | |
if (!is_null($curl)) { | |
$this->_curlInfo = curl_getinfo($curl); | |
} | |
$this->_rawResponse = $rawResponse; | |
if ($message == "") { | |
$message = sprintf("Server responded with an HTTP %d error", $httpStatusCode); | |
if ($this->getContentType() == "application/xml" && $this->getResponseHeader("Server") == "AmazonS3" && $this->getResponseBody() != "") { | |
$dom = new DOMDocument(); | |
$dom->loadXML($this->getResponseBody()); | |
$xpath = new DOMXPath($dom); | |
$nodes = $xpath->query("/Error/Message"); | |
if($nodes->length == 1) { | |
$message = sprintf("Amazon S3 responded with an HTTP %d error (%s)", $httpStatusCode, $nodes->item(0)->nodeValue); | |
} | |
} | |
} | |
parent::__construct($message, $code, $previous); | |
} | |
public function getHttpStatusCode() | |
{ | |
return $this->_httpStatusCode; | |
} | |
public function getRawResponse() | |
{ | |
return $this->_rawResponse; | |
} | |
public function getResponseBody() | |
{ | |
$this->_parseRawResponse(); | |
return $this->_responseBody; | |
} | |
public function getResponseHeaders() | |
{ | |
$this->_parseRawResponse(); | |
return $this->_responseHeaders; | |
} | |
public function getResponseHeadersAsArray() | |
{ | |
$headerLines = explode("\r\n", $this->getResponseHeaders()); | |
$headersAsArray = array(); | |
foreach ($headerLines as $headerLine) { | |
if (preg_match("/^(.*):\s*(.*)$/", $headerLine, $matches)) { | |
$headersAsArray[$matches[1]] = $matches[2]; | |
} | |
} | |
return $headersAsArray; | |
} | |
public function getResponseHeader($key) | |
{ | |
$header = null; | |
$headers = $this->getResponseHeadersAsArray(); | |
if (array_key_exists($key, $headers)) { | |
$header = $headers[$key]; | |
} | |
return $header; | |
} | |
protected function _parseRawResponse() | |
{ | |
$rawResponseParts = explode("\r\n\r\n", $this->_rawResponse); | |
$rawHeaderParts = array(); | |
$responseParts = array(); | |
foreach ($rawResponseParts as $rawResponsePart) { | |
if (preg_match("/^HTTP/", $rawResponsePart) && count($responseParts) == 0) { | |
$rawHeaderParts[] = $rawResponsePart; | |
} else { | |
$responseParts[] = $rawResponsePart; | |
} | |
} | |
$this->_responseHeaders = implode("\r\n\r\n", $rawHeaderParts); | |
$this->_responseBody = implode("\r\n\r\n", $responseParts); | |
} | |
public function __call($name, $args) | |
{ | |
if (preg_match("/^get(.*)$/", $name, $matches)) { | |
$camelCaseProperty = $matches[1]; | |
$property = strtolower(preg_replace("/(\w)([A-Z])/", "\\1_\\2", $camelCaseProperty)); | |
if (isset($this->_curlInfo[$property])) { | |
return $this->_curlInfo[$property]; | |
} | |
return null; | |
} | |
throw new BadMethodCallException(); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
require("getUrl.php"); | |
include("../removeOldFiles/removeOldFiles.php"); | |
getUrl( | |
"http://craiga.id.au/", | |
"GET", | |
array(), array( | |
CURLOPT_VERBOSE => true | |
), | |
60, | |
0.5 | |
); | |
try { | |
getUrl("https://s3-ap-southeast-2.amazonaws.com/craigs-junk-bucket/file-for-getUrl-testing"); | |
} catch (GetUrlHttpErrorException $e) { | |
var_dump($e->getMessage()); | |
var_dump($e->getContentType()); | |
var_dump($e->getUrl()); | |
var_dump($e->getRawResponse()); | |
var_dump($e->getResponseBody()); | |
var_dump($e->getResponseHeaders()); | |
var_dump($e->getResponseHeader("Server")); | |
try { | |
$e->thing(); | |
} catch (BadMethodCallException $e) { | |
// expected | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment