Created
July 3, 2016 18:20
-
-
Save rafasashi/ec8fa63155c1990e81f889cc3a053a0c to your computer and use it in GitHub Desktop.
Implementation of curl_multi_exec for parallel processing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function custom_curl_multi_exec($mh, &$running){ | |
do{ | |
$rv = curl_multi_exec($mh, $running); | |
} | |
while ($rv === CURLM_CALL_MULTI_PERFORM); | |
return $rv; | |
} | |
function multi_curl($data, $extra_options=array(), $headers_only=false, $http_code_only=false){ | |
$multi_curl=$results=array(); | |
$mh = curl_multi_init(); | |
foreach ($data as $id=>$args){ | |
//----------------------------get url--------------------- | |
$url=''; | |
if(is_array($args)){ | |
if(isset($args['url'])){ | |
$url = $args['url']; | |
} | |
} | |
else{ | |
$url=$args; | |
} | |
//----------------------init curl------------------------ | |
if($url!=''){ | |
$multi_curl[$id] = curl_init($url); | |
//----------------------set default curl options------------------------ | |
$timeout=5; | |
$curl_options=get_curl_options(USER_AGENT, $args, $timeout, $headers_only); | |
curl_setopt_array($multi_curl[$id], $curl_options); | |
//----------------------set curl multi options------------------------ | |
//Store private data inside the cURL easy handle | |
$multi_options[CURLOPT_PRIVATE]=$id; | |
curl_setopt_array($multi_curl[$id], $multi_options); | |
//----------------------set extra curl options------------------------ | |
if(!empty($extra_options)) { | |
curl_setopt_array($multi_curl[$id], $extra_options); | |
} | |
//-------------------------set curl handler-------------------------- | |
curl_multi_add_handle($mh, $multi_curl[$id]); | |
} | |
} | |
//---------------------------run curl multi request----------------------- | |
$running = null; | |
if(function_exists('custom_curl_multi_exec')){ | |
// start requests | |
$mrc = custom_curl_multi_exec($mh, $running); | |
// "wait for completion"-loop | |
do{ | |
// non-busy (!) wait for state change | |
//curl_multi_select($mh); | |
if(curl_multi_select($mh) == -1){ | |
//usleep(1); //never finish... | |
} | |
// get new state | |
$mrc = custom_curl_multi_exec($mh, $running); | |
} | |
while($running > 0 && $mrc == CURLM_OK); | |
} | |
else{ | |
do{ | |
curl_multi_exec($mh, $running); | |
} | |
while($running > 0); | |
} | |
//---------------------------handle results----------------------- | |
foreach($multi_curl as $id => $c) { | |
if($headers_only===true){ | |
if($http_code_only===true){ | |
$results[$id] = curl_getinfo($c, CURLINFO_HTTP_CODE); | |
} | |
else{ | |
$results[$id] = curl_getinfo($c); | |
} | |
} | |
else{ | |
$results[$id] = curl_multi_getcontent($c); | |
} | |
curl_multi_remove_handle($mh, $c); | |
} | |
curl_multi_close($mh); | |
return $results; | |
} | |
function get_curl_options($user_agent='', $args=array(), $timeout=GET_CONTENT_TIMEOUT, $header_only=false, $ref_url=CUR_URL){ | |
//--------------------------get user agent---------------------- | |
if($user_agent==''){ | |
if(isset($args['user-agent'])){ | |
$user_agent=$args['user-agent']; | |
} | |
elseif(defined('USER_AGENT')){ | |
$user_agent=USER_AGENT; | |
} | |
else{ | |
$user_agent=HTTP_USER_AGENT; | |
} | |
} | |
$curl_options = array(); | |
if($header_only===true){ | |
//--------------------get headers only----------------- | |
$curl_options[CURLOPT_RETURNTRANSFER] = true; // return web page | |
$curl_options[CURLOPT_HEADER] = false; // do not return headers | |
$curl_options[CURLOPT_FOLLOWLOCATION] = true; // follow redirects | |
$curl_options[CURLOPT_USERAGENT] = $user_agent; // who am i | |
$curl_options[CURLOPT_AUTOREFERER] = true; // set referer on redirect | |
$curl_options[CURLOPT_CONNECTTIMEOUT] = $timeout; // timeout on connect (in seconds) | |
$curl_options[CURLOPT_TIMEOUT] = $timeout; // timeout on response (in seconds) | |
$curl_options[CURLOPT_MAXREDIRS] = 10; // stop after 10 redirects | |
$curl_options[CURLOPT_SSL_VERIFYPEER] = false; // SSL verification not required | |
$curl_options[CURLOPT_SSL_VERIFYHOST] = false; // SSL verification not required | |
} | |
else{ | |
//-------------------build custom headers-------------- | |
$headers=array(); | |
$headers[0]="Accept: text/xml,application/xml,application/xhtml+xml,application/json"; | |
$headers[0].="text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"; | |
$headers[]="Cache-Control: max-age=0"; | |
$headers[]="Connection: keep-alive"; | |
$headers[]="Keep-Alive: 2"; | |
$headers[]="Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7"; | |
$headers[]="Accept-Language: en-us,en;q=0.5"; | |
// Note that if you want to use a proxy and use it as a _cache_, you'll have to do "Pragma: " | |
// else by default Curl puts a "Pragma: no-cache" header and force cache misses for all requests. | |
//$headers[]="Pragma: "; | |
if(is_array($args)&&!empty($args['headers'])) { | |
foreach($args['headers'] as $header_key => $header_value){ | |
$headers[]=$header_key.': '.$header_value; | |
} | |
} | |
//----------------------------set curl options------------------------------- | |
$curl_options[CURLOPT_ENCODING]= ""; | |
$curl_options[CURLOPT_SSL_VERIFYPEER]= FALSE; | |
$curl_options[CURLOPT_SSL_VERIFYHOST]= FALSE; | |
$curl_options[CURLOPT_USERAGENT]= $user_agent; | |
$curl_options[CURLOPT_TIMEOUT]= $timeout; | |
$curl_options[CURLOPT_FAILONERROR]= true; | |
$curl_options[CURLOPT_FOLLOWLOCATION]= true; | |
$curl_options[CURLOPT_RETURNTRANSFER]= true; | |
$curl_options[CURLOPT_NOBODY]= 0; | |
$curl_options[CURLOPT_HEADER]= 0; | |
$curl_options[CURLOPT_REFERER]= $ref_url; | |
$curl_options[CURLOPT_AUTOREFERER]= true; | |
$curl_options[CURLOPT_HTTPHEADER]= $headers; | |
//$curl_options[CURLOPT_ENCODING]= 'gzip,deflate'; | |
//------------------------pass post arguments-------------------------- | |
if(is_array($args)&&isset($args['post'])&&!empty($args['post'])) { | |
$curl_options[CURLOPT_POST]=true; | |
$curl_options[CURLOPT_CUSTOMREQUEST]='POST'; | |
$curl_options[CURLOPT_POSTFIELDS]=$args['post']; | |
} | |
} | |
return $curl_options; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$multi_request=[]; | |
$multi_request['google']['url']='http://google.com'; | |
$multi_request['yahoo']['url']='http://yahoo.com'; | |
$multi_request['twitter']['url']='http://twitter.com'; | |
$cur_results=multi_curl($multi_request); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment