Skip to content

Instantly share code, notes, and snippets.

@igorzakhar
Last active December 30, 2019 08:10
Show Gist options
  • Save igorzakhar/b8cd119375f960e0d1e7a35115591f9c to your computer and use it in GitHub Desktop.
Save igorzakhar/b8cd119375f960e0d1e7a35115591f9c to your computer and use it in GitHub Desktop.
import argparse
from concurrent.futures import ThreadPoolExecutor, as_completed
import logging
import random
import sys
import requests
from fake_useragent import UserAgent
def process_args():
parser = argparse.ArgumentParser()
parser.add_argument('-u', '--url', default='https://www.instagram.com/')
parser.add_argument('-c', '--urls-count', default=100, type=int)
parser.add_argument('-o', '--output', default='output.csv')
parser.add_argument('-p', '--proxies', default='proxies.txt')
parser.add_argument('-i', '--iter-count', default=1, type=int)
parser.add_argument('-n', '--thread-count', default=1, type=int)
parser.add_argument('-t', '--connect-timeout', default=None, type=float)
parser.add_argument('-r', '--read-timeout', default=None, type=float)
return parser.parse_args()
def get_proxies_list(filepath):
try:
with open(filepath) as fp:
return fp.readlines()
except OSError as err:
logging.exception(f'{err.strerror}: {err.filename}', exc_info=False)
return
def get_cookie_values(url, proxies_list, user_agent, to=None):
headers = {
'User-Agent': user_agent.random,
}
proxy = {'https': f"socks5://{random.choice(proxies_list).strip()}"}
try:
resp = requests.get(url, headers=headers, proxies=proxy, timeout=to)
except requests.exceptions.RequestException as err:
logging.debug(err)
return
else:
csrftoken = resp.cookies.get('csrftoken')
rur = resp.cookies.get('rur')
mid = resp.cookies.get('mid')
logging.info(f'csrftoken: {csrftoken} rur: {rur} mid: {mid}')
return csrftoken, rur, mid
def save_results(results, filename):
try:
with open(filename, 'a') as fp:
for future in as_completed(results):
if future.result():
csrftoken, rur, mid = future.result()
if all(item is None for item in (csrftoken, rur, mid)):
continue
else:
fp.write(f'{csrftoken};{rur};{mid}\n')
except OSError as err:
logging.exception(f'{err.strerror}: {err.filename}', exc_info=False)
raise
def main():
logging.basicConfig(level=logging.INFO, format='%(message)s')
logging.getLogger('urllib3').setLevel(logging.WARNING)
args = process_args()
url = args.url
urls_count = args.urls_count
iter_count = args.iter_count
thread_count = args.thread_count
output_file = args.output
proxies_file = args.proxies
proxies = get_proxies_list(proxies_file)
if proxies is None:
return
connect_timeout = args.connect_timeout
read_timeout = args.read_timeout
user_agent = UserAgent()
while iter_count > 0:
with ThreadPoolExecutor(max_workers=thread_count) as executor:
results = {
executor.submit(
get_cookie_values,
url,
proxies,
user_agent,
to=(connect_timeout, read_timeout)
): url
for _ in range(urls_count)
}
save_results(results, output_file)
iter_count -= 1
if __name__ == '__main__':
try:
main()
except OSError:
sys.exit()
fake-useragent==0.1.11
PySocks==1.7.1
requests==2.22.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment