Last active
July 12, 2025 07:35
-
-
Save palewire/0dded073b8f9aa9202ca2f364e664568 to your computer and use it in GitHub Desktop.
Rotating proxy scraper example
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Rotating proxy scraper example
By Ben Welsh
An example of how to scrape a list of available proxies and use them to make web requests. Helpful when scraping sites that employ measures to restrict access.
import requests
import itertools
from bs4 import BeautifulSoup
Get proxy list from free-proxy-list.net
def get_proxies():
"""
Fetch a list of proxy addresses from the web.
"""
# Fetch the page with the list
r = requests.get('https://free-proxy-list.net/')
proxy_list = get_proxies()
proxy_list
{'103.204.210.112:8080',
'103.240.109.171:53281',
'103.42.253.218:8080',
'103.57.71.109:53281',
'110.77.188.103:62225',
'110.77.239.83:42619',
'111.67.71.238:53281',
'114.134.187.162:53281',
'121.166.157.33:8080',
'121.52.157.23:8080',
'138.186.21.86:53281',
'138.204.142.139:31773',
'139.5.153.86:53281',
'145.249.105.25:8118',
'145.255.28.218:53281',
'160.119.153.206:13093',
'170.84.51.74:53281',
'177.206.131.128:53281',
'177.67.217.14:53281',
'178.176.28.164:8080',
'179.191.87.158:53281',
'181.112.145.222:53281',
'181.112.34.222:53281',
'181.112.46.250:53281',
'181.192.30.222:53281',
'182.253.130.174:53281',
'182.253.37.116:3128',
'186.46.90.50:53281',
'188.126.63.203:41258',
'189.43.88.18:53281',
'190.128.158.54:53281',
'192.141.118.255:53281',
'193.107.247.98:53281',
'200.58.214.114:8080',
'201.166.181.8:53281',
'202.142.164.22:53281',
'213.192.75.138:53281',
'27.255.40.63:8080',
'31.41.89.73:41258',
'36.83.72.178:80',
'37.60.215.133:53281',
'38.123.68.72:8080',
'5.228.166.234:53281',
'5.9.70.215:808',
'62.213.14.166:8080',
'77.85.169.2:8080',
'78.156.49.26:41258',
'78.189.65.220:8080',
'80.254.102.220:3128',
'81.163.50.192:41258',
'81.30.216.147:41258',
'81.95.139.186:53281',
'85.117.77.75:53281',
'89.110.59.227:8080',
'89.255.71.162:53281',
'89.43.38.32:8080',
'91.224.63.218:8080',
'91.230.252.163:3128',
'92.247.93.142:8080',
'95.47.83.56:44331'}
Convert it into a pool that will randomly return items forever
proxy_pool = itertools.cycle(proxy_list)
next(proxy_pool)
'89.43.38.32:8080'
next(proxy_pool)
'138.186.21.86:53281'
next(proxy_pool)
'91.230.252.163:3128'
Create a similar pool of user agents
useragent_list = [
# Chrome
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36',
'Mozilla/5.0 (Windows NT 5.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
# Firefox
'Mozilla/4.0 (compatible; MSIE 9.0; Windows NT 6.1)',
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)',
'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 6.2; WOW64; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0)',
'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)',
'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)',
'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)'
]
useragent_pool = itertools.cycle(useragent_list)
next(useragent_pool)
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36'
next(useragent_pool)
<itertools.cycle at 0x7f8acc3d01b8>
next(useragent_pool)