Last active
January 24, 2021 14:33
-
-
Save guruguruman/1e424ef0a7e8d9e0113a2c9c0b9dfa9e to your computer and use it in GitHub Desktop.
Rotate public listed proxies per each request in PHP.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* A class which provide public listing proxies rotated when client request. | |
*/ | |
Class ScrapingProxyProvider | |
{ | |
// Proxies available. | |
private $proxyDatas = array(); | |
// Currently used proxy. | |
private $proxyData = null; | |
// Maximum count client can use per proxy. | |
public $limitUsageCount = 5; | |
// Current proxy used count by client. | |
private $usedCount = 0; | |
/** | |
* Return proxy data list remotely, currently we fetch from 'https://proxy.l337.tech' | |
* with big thanks. | |
*/ | |
private function getRemoteProxies() | |
{ | |
$proxyDatas = array(); | |
$content = file_get_contents("https://proxy.l337.tech/txt"); | |
$rawProxies = explode("\n", $content); | |
foreach ($rawProxies as $rawProxy) { | |
$rawProxy = explode(":", $rawProxy); | |
if (count($rawProxy) != 2) { | |
continue; | |
} | |
$proxyData = array(); | |
$proxyData["ip"] = $rawProxy[0]; | |
$proxyData["port"] = $rawProxy[1]; | |
$proxyDatas[] = $proxyData; | |
} | |
return $proxyDatas; | |
} | |
/** | |
* Discurd 'proxyData' which is currently used, to trigger | |
* force proxy rotation. | |
*/ | |
public function discardCurrent() | |
{ | |
$this->proxyData = null; | |
} | |
/** | |
* Return proxy data which has 'ip' with 'port' separatedly. | |
* Each time when client calll, roatte proxy if needed. | |
*/ | |
public function fetchProxyData() | |
{ | |
// Get proxies remotely if proxies get out of stocks or no proxies exists. | |
if (!$this->proxyDatas || count($this->proxyDatas) == 0) { | |
$proxies = $this->getRemoteProxies(); | |
$this->proxyDatas = $proxies; | |
} | |
// Rotate proxy. | |
if (!$this->proxyData || $this->limitUsageCount <= $this->usedCount){ | |
$nextProxyData = array_shift($this->proxyDatas); | |
$this->usedCount = 0; | |
$this->proxyData = $nextProxyData; | |
} | |
$this->usedCount++; | |
return $this->proxyData; | |
} | |
} | |
/** | |
* Example following to give example how we use handle. | |
* When the ip banned, recommended to call 'discardCurrent' to rotate forcely. | |
*/ | |
$proxyProvider = new ScrapingProxyProvider(); | |
$proxyProvider->limitUsageCount = 5; | |
$url = "https://example.com"; | |
$requestCount = 30; | |
for ($i = 0; $i < $requestCount; $i++) { | |
$proxyData = $proxyProvider->fetchProxyData(); | |
$ch = curl_init($url); | |
$proxy = "{$proxyData["ip"]}:{$proxyData["port"]}"; | |
curl_setopt_array($ch, array( | |
CURLOPT_PROXY => $proxy, | |
CURLOPT_HEADER => true, | |
CURLOPT_RETURNTRANSFER => true, | |
CURLOPT_FOLLOWLOCATION => true, | |
)); | |
$content = curl_exec($ch); | |
$status = curl_getinfo($ch, CURLINFO_HTTP_CODE); | |
curl_close($ch); | |
if ($status != 200) { | |
print(" Used proxy could be banned, discard current proxy to get next one."); | |
$proxyProvider->discardCurrent(); | |
} | |
print("{$proxy} => ". $status. PHP_EOL); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment