Skip to content

Instantly share code, notes, and snippets.

@rafasashi
Created July 3, 2016 18:20
Show Gist options
  • Save rafasashi/ec8fa63155c1990e81f889cc3a053a0c to your computer and use it in GitHub Desktop.
Save rafasashi/ec8fa63155c1990e81f889cc3a053a0c to your computer and use it in GitHub Desktop.
Implementation of curl_multi_exec for parallel processing
function custom_curl_multi_exec($mh, &$running){
do{
$rv = curl_multi_exec($mh, $running);
}
while ($rv === CURLM_CALL_MULTI_PERFORM);
return $rv;
}
function multi_curl($data, $extra_options=array(), $headers_only=false, $http_code_only=false){
$multi_curl=$results=array();
$mh = curl_multi_init();
foreach ($data as $id=>$args){
//----------------------------get url---------------------
$url='';
if(is_array($args)){
if(isset($args['url'])){
$url = $args['url'];
}
}
else{
$url=$args;
}
//----------------------init curl------------------------
if($url!=''){
$multi_curl[$id] = curl_init($url);
//----------------------set default curl options------------------------
$timeout=5;
$curl_options=get_curl_options(USER_AGENT, $args, $timeout, $headers_only);
curl_setopt_array($multi_curl[$id], $curl_options);
//----------------------set curl multi options------------------------
//Store private data inside the cURL easy handle
$multi_options[CURLOPT_PRIVATE]=$id;
curl_setopt_array($multi_curl[$id], $multi_options);
//----------------------set extra curl options------------------------
if(!empty($extra_options)) {
curl_setopt_array($multi_curl[$id], $extra_options);
}
//-------------------------set curl handler--------------------------
curl_multi_add_handle($mh, $multi_curl[$id]);
}
}
//---------------------------run curl multi request-----------------------
$running = null;
if(function_exists('custom_curl_multi_exec')){
// start requests
$mrc = custom_curl_multi_exec($mh, $running);
// "wait for completion"-loop
do{
// non-busy (!) wait for state change
//curl_multi_select($mh);
if(curl_multi_select($mh) == -1){
//usleep(1); //never finish...
}
// get new state
$mrc = custom_curl_multi_exec($mh, $running);
}
while($running > 0 && $mrc == CURLM_OK);
}
else{
do{
curl_multi_exec($mh, $running);
}
while($running > 0);
}
//---------------------------handle results-----------------------
foreach($multi_curl as $id => $c) {
if($headers_only===true){
if($http_code_only===true){
$results[$id] = curl_getinfo($c, CURLINFO_HTTP_CODE);
}
else{
$results[$id] = curl_getinfo($c);
}
}
else{
$results[$id] = curl_multi_getcontent($c);
}
curl_multi_remove_handle($mh, $c);
}
curl_multi_close($mh);
return $results;
}
function get_curl_options($user_agent='', $args=array(), $timeout=GET_CONTENT_TIMEOUT, $header_only=false, $ref_url=CUR_URL){
//--------------------------get user agent----------------------
if($user_agent==''){
if(isset($args['user-agent'])){
$user_agent=$args['user-agent'];
}
elseif(defined('USER_AGENT')){
$user_agent=USER_AGENT;
}
else{
$user_agent=HTTP_USER_AGENT;
}
}
$curl_options = array();
if($header_only===true){
//--------------------get headers only-----------------
$curl_options[CURLOPT_RETURNTRANSFER] = true; // return web page
$curl_options[CURLOPT_HEADER] = false; // do not return headers
$curl_options[CURLOPT_FOLLOWLOCATION] = true; // follow redirects
$curl_options[CURLOPT_USERAGENT] = $user_agent; // who am i
$curl_options[CURLOPT_AUTOREFERER] = true; // set referer on redirect
$curl_options[CURLOPT_CONNECTTIMEOUT] = $timeout; // timeout on connect (in seconds)
$curl_options[CURLOPT_TIMEOUT] = $timeout; // timeout on response (in seconds)
$curl_options[CURLOPT_MAXREDIRS] = 10; // stop after 10 redirects
$curl_options[CURLOPT_SSL_VERIFYPEER] = false; // SSL verification not required
$curl_options[CURLOPT_SSL_VERIFYHOST] = false; // SSL verification not required
}
else{
//-------------------build custom headers--------------
$headers=array();
$headers[0]="Accept: text/xml,application/xml,application/xhtml+xml,application/json";
$headers[0].="text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
$headers[]="Cache-Control: max-age=0";
$headers[]="Connection: keep-alive";
$headers[]="Keep-Alive: 2";
$headers[]="Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
$headers[]="Accept-Language: en-us,en;q=0.5";
// Note that if you want to use a proxy and use it as a _cache_, you'll have to do "Pragma: "
// else by default Curl puts a "Pragma: no-cache" header and force cache misses for all requests.
//$headers[]="Pragma: ";
if(is_array($args)&&!empty($args['headers'])) {
foreach($args['headers'] as $header_key => $header_value){
$headers[]=$header_key.': '.$header_value;
}
}
//----------------------------set curl options-------------------------------
$curl_options[CURLOPT_ENCODING]= "";
$curl_options[CURLOPT_SSL_VERIFYPEER]= FALSE;
$curl_options[CURLOPT_SSL_VERIFYHOST]= FALSE;
$curl_options[CURLOPT_USERAGENT]= $user_agent;
$curl_options[CURLOPT_TIMEOUT]= $timeout;
$curl_options[CURLOPT_FAILONERROR]= true;
$curl_options[CURLOPT_FOLLOWLOCATION]= true;
$curl_options[CURLOPT_RETURNTRANSFER]= true;
$curl_options[CURLOPT_NOBODY]= 0;
$curl_options[CURLOPT_HEADER]= 0;
$curl_options[CURLOPT_REFERER]= $ref_url;
$curl_options[CURLOPT_AUTOREFERER]= true;
$curl_options[CURLOPT_HTTPHEADER]= $headers;
//$curl_options[CURLOPT_ENCODING]= 'gzip,deflate';
//------------------------pass post arguments--------------------------
if(is_array($args)&&isset($args['post'])&&!empty($args['post'])) {
$curl_options[CURLOPT_POST]=true;
$curl_options[CURLOPT_CUSTOMREQUEST]='POST';
$curl_options[CURLOPT_POSTFIELDS]=$args['post'];
}
}
return $curl_options;
}
$multi_request=[];
$multi_request['google']['url']='http://google.com';
$multi_request['yahoo']['url']='http://yahoo.com';
$multi_request['twitter']['url']='http://twitter.com';
$cur_results=multi_curl($multi_request);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment