Created
July 10, 2019 12:20
-
-
Save ntrzz/ef6d5e01c76a5cf7ad70d6c66753c54b to your computer and use it in GitHub Desktop.
Async Parallel HTTP GET requests through Proxy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from multiprocessing.pool import ThreadPool | |
from time import time as timer | |
import requests | |
with open('domains.txt') as f: | |
content = f.readlines() | |
urls = [x.strip() for x in content] | |
def fetch_url(url): | |
proxies = { | |
"http": "http://127.0.0.1:8080", | |
"https": "http://127.0.0.1:8080", | |
} | |
try: | |
headers = {'User-Agent': 'Mozilla/5.0'} | |
response = requests.get(url, proxies=proxies, headers=headers, timeout=5) | |
return url, response.status_code, None | |
except Exception as e: | |
return url, None, e | |
start = timer() | |
results = ThreadPool(100).imap_unordered(fetch_url, urls) | |
for url, html, error in results: | |
if error is None: | |
print("%r fetched in %ss" % (url, timer() - start)) | |
else: | |
print("error fetching %r: %s" % (url, error)) | |
print("Elapsed Time: %s" % (timer() - start,)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment