Created
May 26, 2011 06:42
-
-
Save larryv/992662 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3.2 | |
import csv | |
import datetime | |
import glob | |
import io | |
import os | |
import os.path | |
import sys | |
import tempfile | |
import time | |
import threading | |
from urllib.parse import unquote_plus, urlparse | |
import urllib.request | |
import zipfile | |
class DownloadWebRes(threading.Thread): | |
def __init__(self, remote, local): | |
super().__init__() | |
self.remote = remote | |
self.local = local | |
self.data = b'' | |
self.written = 0 | |
@property | |
def progress(self): | |
return len(self.data) | |
def run(self): | |
chunk = self.remote.read(io.DEFAULT_BUFFER_SIZE) | |
while chunk: | |
self.data += chunk | |
chunk = self.remote.read(io.DEFAULT_BUFFER_SIZE) | |
self.written = self.local.write(self.data) | |
def download_web_res(url, dest): | |
# Set up remote location | |
fremote = urllib.request.urlopen(url) | |
size = int(fremote.getheader('Content-length')) | |
print('Downloading {} bytes from "{}"'.format(size, url)) | |
# Download! | |
with open(dest, 'w+b') as flocal: | |
decile = 0 | |
dl_thread = DownloadWebRes(fremote, flocal) | |
dl_thread.start() | |
while dl_thread.is_alive(): | |
incr = int((dl_thread.progress / size * 100 - decile * 10) // 10) | |
print('.' * incr, end='') | |
sys.stdout.flush() | |
decile += incr | |
time.sleep(0.1) | |
print('.' * (10 - decile)) | |
# Wrap up | |
print('Wrote {} bytes to "{}"'.format(dl_thread.written, dest)) | |
fremote.close() | |
def go(): | |
# Read in data | |
sources = {} | |
for src in glob.iglob(join('update_data_sources', '*')): | |
agency = os.path.splitext(os.path.basename(src))[0] | |
with open(src, newline='') as f: | |
sources[agency] = list(csv.DictReader(f)) | |
# Write out data | |
today = datetime.date.today().isoformat() | |
for agency in sources: | |
for data_file in sources[agency]: | |
print(data_file['name'] + '\n' + '=' * len(data_file['name'])) | |
# Create requested directory | |
dest = os.path.join('data', today, agency, | |
os.path.normpath(data_file['dest'])) | |
os.makedirs(dest, mode=0o700, exist_ok=True) | |
# Determine file path to save to | |
url_path = unquote_plus(urlparse(data_file['url']).path) | |
url_file = url_path.rpartition('/')[2] | |
is_zip = os.path.splitext(url_file)[1].lower() == '.zip' | |
path = os.path.join(os.path.dirname(dest) if is_zip else dest, | |
url_file) | |
download_web_res(data_file['url'], path) | |
# If zip file, extract members | |
if is_zip: | |
with zipfile.ZipFile(path) as fzip: | |
fzip.extractall(dest) | |
print('Extracted "{}" to "{}"'.format(url_file, dest)) | |
print() | |
if __name__ == '__main__': | |
go() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment