Skip to content

Instantly share code, notes, and snippets.

@chris-jamieson
Created October 18, 2015 16:26
Show Gist options
  • Save chris-jamieson/78efd4ae1d28350bf049 to your computer and use it in GitHub Desktop.
Save chris-jamieson/78efd4ae1d28350bf049 to your computer and use it in GitHub Desktop.
Random proxy rotation
<?php
// suggest buying proxies from http://buyproxies.org/ - I found this to be an excellent service and well-priced too.
// function returns a keyed array of proxy info
// if you want you can pass a specific ID in (for instand if you want to use the same proxy for a sequence of calls
function custom_proxy_handler_get_proxy($proxy_id = 0)
{
$all_proxies = array(
// below is an dummy / example proxy, add the rest from your list here
0 => array(
'server' => '42.108.23.37',
'port' => '80',
'username' => 'yourUsername',
'password' => 'somePassword'
),
// 1 => array()...
);
$proxies_count = variable_get('proxies_total', 0);
if ($proxy_id == 0) {
$proxy_id = rand(1, $proxies_count);
}
$proxy = array();
$proxy['server'] = $all_proxies[$proxy_id]['server'];
$proxy['port'] = $all_proxies[$proxy_id]['port'];
$proxy['username'] = $all_proxies[$proxy_id]['username'];
$proxy['password'] = $all_proxies[$proxy_id]['password'];
return $proxy;
}
// when you want to scrape a page, do something like the following (which uses simple_html_dom as the scraper / parser: http://simplehtmldom.sourceforge.net/)
// call a function like the below as follows:
custom_scraper_get_html('http://target-url.net/this_page.html', TRUE);
function custom_scraper_get_html($url, $use_proxy=FALSE)
{
require_once '/simple_html_dom.php'; // set the path properly for your file location
if ($use_proxy) {
$proxy = custom_proxy_handler_get_proxy();
$auth = base64_encode($proxy['username'].':'.$proxy['password']);
// Define a context for HTTP.
$context = array(
'http' => array(
'proxy' => $proxy['server'].':'.$proxy['port'], // This needs to be the server and the port of the NTLM Authentication Proxy Server.
'request_fulluri' => true,
'header' => "Proxy-Authorization: Basic $auth",
)
);
$context = stream_context_create($context);
$html = file_get_html($url, false, $context);
} else {
// do it without a proxy
$html = file_get_html($url);
}
return $html;
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment