larryv · May 26, 2011 06:42
diff --git a/update_data.py b/update_data.py
 #!/usr/bin/env python3.2

 import csv
 import datetime
 import glob
 import io
 import os
 import os.path
 import sys
 import tempfile
 import time
 import threading
 from urllib.parse import unquote_plus, urlparse
 import urllib.request
 import zipfile


 class DownloadWebRes(threading.Thread):
    
    def __init__(self, remote, local):
        super().__init__()
        self.remote = remote
        self.local = local
        self.data = b''
        self.written = 0
    
    @property
    def progress(self):
        return len(self.data)
    
    def run(self):
        chunk = self.remote.read(io.DEFAULT_BUFFER_SIZE)
        while chunk:
            self.data += chunk
            chunk = self.remote.read(io.DEFAULT_BUFFER_SIZE)
        self.written = self.local.write(self.data)


 def download_web_res(url, dest):
    
    # Set up remote location
    fremote = urllib.request.urlopen(url)
    size = int(fremote.getheader('Content-length'))
    print('Downloading {} bytes from "{}"'.format(size, url))
    
    # Download!
    with open(dest, 'w+b') as flocal:
        decile = 0
        dl_thread = DownloadWebRes(fremote, flocal)
        dl_thread.start()
        while dl_thread.is_alive():
            incr = int((dl_thread.progress / size * 100 - decile * 10) // 10)
            print('.' * incr, end='')
            sys.stdout.flush()
            decile += incr
            time.sleep(0.1)
        print('.' * (10 - decile))
    
    # Wrap up
    print('Wrote {} bytes to "{}"'.format(dl_thread.written, dest))
    fremote.close()


 def go():
    
    # Read in data
    sources = {}
    for src in glob.iglob(join('update_data_sources', '*')):
        agency = os.path.splitext(os.path.basename(src))[0]
        with open(src, newline='') as f:
            sources[agency] = list(csv.DictReader(f))
    
    # Write out data
    today = datetime.date.today().isoformat()
    for agency in sources:
        for data_file in sources[agency]:
            
            print(data_file['name'] + '\n' + '=' * len(data_file['name']))
            
            # Create requested directory
            dest = os.path.join('data', today, agency,
                                os.path.normpath(data_file['dest']))
            os.makedirs(dest, mode=0o700, exist_ok=True)
            
            # Determine file path to save to
            url_path = unquote_plus(urlparse(data_file['url']).path)
            url_file = url_path.rpartition('/')[2]
            is_zip = os.path.splitext(url_file)[1].lower() == '.zip'
            path = os.path.join(os.path.dirname(dest) if is_zip else dest,
                                url_file)
            download_web_res(data_file['url'], path)
            
            # If zip file, extract members
            if is_zip:
                with zipfile.ZipFile(path) as fzip:
                    fzip.extractall(dest)
                print('Extracted "{}" to "{}"'.format(url_file, dest))
            
            print()

 if __name__ == '__main__':
    go()
	#!/usr/bin/env python3.2

	import csv
	import datetime
	import glob
	import io
	import os
	import os.path
	import sys
	import tempfile
	import time
	import threading
	from urllib.parse import unquote_plus, urlparse
	import urllib.request
	import zipfile


	class DownloadWebRes(threading.Thread):

	def __init__(self, remote, local):
	super().__init__()
	self.remote = remote
	self.local = local
	self.data = b''
	self.written = 0

	@property
	def progress(self):
	return len(self.data)

	def run(self):
	chunk = self.remote.read(io.DEFAULT_BUFFER_SIZE)
	while chunk:
	self.data += chunk
	chunk = self.remote.read(io.DEFAULT_BUFFER_SIZE)
	self.written = self.local.write(self.data)


	def download_web_res(url, dest):

	# Set up remote location
	fremote = urllib.request.urlopen(url)
	size = int(fremote.getheader('Content-length'))
	print('Downloading {} bytes from "{}"'.format(size, url))

	# Download!
	with open(dest, 'w+b') as flocal:
	decile = 0
	dl_thread = DownloadWebRes(fremote, flocal)
	dl_thread.start()
	while dl_thread.is_alive():
	incr = int((dl_thread.progress / size * 100 - decile * 10) // 10)
	print('.' * incr, end='')
	sys.stdout.flush()
	decile += incr
	time.sleep(0.1)
	print('.' * (10 - decile))

	# Wrap up
	print('Wrote {} bytes to "{}"'.format(dl_thread.written, dest))
	fremote.close()


	def go():

	# Read in data
	sources = {}
	for src in glob.iglob(join('update_data_sources', '*')):
	agency = os.path.splitext(os.path.basename(src))[0]
	with open(src, newline='') as f:
	sources[agency] = list(csv.DictReader(f))

	# Write out data
	today = datetime.date.today().isoformat()
	for agency in sources:
	for data_file in sources[agency]:

	print(data_file['name'] + '\n' + '=' * len(data_file['name']))

	# Create requested directory
	dest = os.path.join('data', today, agency,
	os.path.normpath(data_file['dest']))
	os.makedirs(dest, mode=0o700, exist_ok=True)

	# Determine file path to save to
	url_path = unquote_plus(urlparse(data_file['url']).path)
	url_file = url_path.rpartition('/')[2]
	is_zip = os.path.splitext(url_file)[1].lower() == '.zip'
	path = os.path.join(os.path.dirname(dest) if is_zip else dest,
	url_file)
	download_web_res(data_file['url'], path)

	# If zip file, extract members
	if is_zip:
	with zipfile.ZipFile(path) as fzip:
	fzip.extractall(dest)
	print('Extracted "{}" to "{}"'.format(url_file, dest))

	print()

	if __name__ == '__main__':
	go()