Created
October 18, 2015 16:26
-
-
Save chris-jamieson/78efd4ae1d28350bf049 to your computer and use it in GitHub Desktop.
Random proxy rotation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// suggest buying proxies from http://buyproxies.org/ - I found this to be an excellent service and well-priced too. | |
// function returns a keyed array of proxy info | |
// if you want you can pass a specific ID in (for instand if you want to use the same proxy for a sequence of calls | |
function custom_proxy_handler_get_proxy($proxy_id = 0) | |
{ | |
$all_proxies = array( | |
// below is an dummy / example proxy, add the rest from your list here | |
0 => array( | |
'server' => '42.108.23.37', | |
'port' => '80', | |
'username' => 'yourUsername', | |
'password' => 'somePassword' | |
), | |
// 1 => array()... | |
); | |
$proxies_count = variable_get('proxies_total', 0); | |
if ($proxy_id == 0) { | |
$proxy_id = rand(1, $proxies_count); | |
} | |
$proxy = array(); | |
$proxy['server'] = $all_proxies[$proxy_id]['server']; | |
$proxy['port'] = $all_proxies[$proxy_id]['port']; | |
$proxy['username'] = $all_proxies[$proxy_id]['username']; | |
$proxy['password'] = $all_proxies[$proxy_id]['password']; | |
return $proxy; | |
} | |
// when you want to scrape a page, do something like the following (which uses simple_html_dom as the scraper / parser: http://simplehtmldom.sourceforge.net/) | |
// call a function like the below as follows: | |
custom_scraper_get_html('http://target-url.net/this_page.html', TRUE); | |
function custom_scraper_get_html($url, $use_proxy=FALSE) | |
{ | |
require_once '/simple_html_dom.php'; // set the path properly for your file location | |
if ($use_proxy) { | |
$proxy = custom_proxy_handler_get_proxy(); | |
$auth = base64_encode($proxy['username'].':'.$proxy['password']); | |
// Define a context for HTTP. | |
$context = array( | |
'http' => array( | |
'proxy' => $proxy['server'].':'.$proxy['port'], // This needs to be the server and the port of the NTLM Authentication Proxy Server. | |
'request_fulluri' => true, | |
'header' => "Proxy-Authorization: Basic $auth", | |
) | |
); | |
$context = stream_context_create($context); | |
$html = file_get_html($url, false, $context); | |
} else { | |
// do it without a proxy | |
$html = file_get_html($url); | |
} | |
return $html; | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment