Skip to content

Instantly share code, notes, and snippets.

@sparrowu93
Last active April 11, 2017 05:26
Show Gist options
  • Save sparrowu93/4e87b0a9f88432dc90375bbd0d500e8c to your computer and use it in GitHub Desktop.
Save sparrowu93/4e87b0a9f88432dc90375bbd0d500e8c to your computer and use it in GitHub Desktop.
python large file download
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from pathlib import Path
import requests
from random import randint
base_timeout = 10
headers = {'User-Agent': '-UserAgentStrings-'}
# pip install fake-useragent
# from fake_useragent import UserAgent
# ua = UserAgent()
# ua.chrome
# headers = {'User-Agent': ua.chrome}
from hashlib import md5
def md5_encrypt(data):
hash_distinct = md5(data.encode('utf-8')).hexdigest()
return hash_distinct
def read_and_write(data):
with open('file.csv', 'r') as read:
lines = read.readlines()
read.close
with open('file.csv', 'a') as out:
out.write('%s' % (data))
out.close
with open('hash.txt', 'w') as out:
out.write('%s' % (data)
out.close()
def download_files(url, file_dir, file_name):
file_request = requests.get(url, stream=True, headers=headers, timeout=basic_timeout*2)
if not os.path.isdir(file_dir):
os.makedirs(file_dir)
file_path = '%s%s' % (file_dir, file_name)
with open(file_path, 'wb') as out_file:
for chunk in file_request.iter_content(chunk_size=1024):
if chunk:
out_file.write(chunk)
del file_request
import threading
class downloadWorker(threading.Thread):
def __init__(self, images):
threading.Thread.__init__(self)
self.name = "image_worker"
self.images = images
def run(self):
print ("start:" + self.name)
# func(args)
print ("exit:" + self.name)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment