Created
January 28, 2019 17:00
-
-
Save craigderington/aa85e5c3abd95bef8f5015c3e2c69c39 to your computer and use it in GitHub Desktop.
OpenAddress Data from Local Filesystem Path - Query into MySQL Database
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!.env/bin/python | |
# -*- coding:utf-8 -*- | |
import config | |
import logging | |
import os | |
import csv | |
import sys | |
import MySQLdb | |
class DatabaseConnection(object): | |
""" | |
Create a new MySQL database connection | |
:param host, user, password, database | |
:type MySQL connection object | |
:return MySQL database query objects | |
""" | |
dbconfig = (config.db_host, config.db_user, config.db_pass, config.db) | |
def __init__(self): | |
self.conn = MySQLdb.connect(*self.dbconfig) | |
self.cur = self.conn.cursor() | |
def __enter__(self): | |
return DatabaseConnection() | |
def query(self, sql, params): | |
self.cur.execute(sql, params) | |
self.conn.commit() | |
def __exit__(self, exc_type, exc_val, exc_tb): | |
if self.conn: | |
self.conn.close() | |
def get_path(): | |
""" | |
Get working directory path and list of files | |
:return: list | |
""" | |
path = os.getcwd() + '/data/us_south/us/' | |
return path | |
def get_file_list(path): | |
""" | |
Get a list of CSV files from each directory | |
:return: list of lists | |
""" | |
csv_files = [os.path.join(d, x) for d, dirs, files in os.walk(path) for x in files if x.endswith(".csv")] | |
return csv_files | |
def split_paths(path): | |
""" | |
Split the path string neatly into usable parts | |
:param path: | |
:return: country, state | |
""" | |
return os.path.split(path) | |
def get_state(path): | |
""" | |
Return the city and state | |
:return: | |
""" | |
values = path.split('/') | |
country = values[6] | |
state = values[7] | |
return country, state | |
def main(): | |
""" | |
Main entry point | |
:return: none | |
""" | |
# create our log | |
log_format = '%(asctime)s %(levelname)s:%(message)s' | |
logging.basicConfig( | |
format=log_format, | |
filename=__name__ + '.addresses' | |
) | |
# set a counter | |
counter = 0 | |
sql = '''insert into address (lon, lat, number, street, unit, city, district, \ | |
region, postcode, unique_id, hash) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)''' | |
# determine paths | |
try: | |
path = get_path() | |
if path: | |
try: | |
file_list = get_file_list(path) | |
if file_list: | |
try: | |
for item in file_list: | |
paths = split_paths(item) | |
data = get_state(paths[0]) | |
filename = paths[1] | |
country = data[0] | |
state = data[1] | |
try: | |
with open(path + '/' + state + '/' + filename, 'r') as f1: | |
reader = csv.reader(f1, delimiter=',') | |
for row in reader: | |
print(country.upper(), state.upper(), row) | |
params = (row[0], row[1], row[2] or 0, row[3], row[4], row[5], row[6], row[7], | |
row[8], row[9], row[10]) | |
try: | |
with DatabaseConnection() as db: | |
db.query(sql, params) | |
logging.info('Added record: {} {}'.format(str(row[4]), str(row[5]))) | |
except MySQLdb.Error as db_err: | |
logging.critical('A database exception has ' | |
'occurred: {}'.format(str(db_err))) | |
counter += 1 | |
except Exception as e: | |
logging.critical('Can not open file paths: {}'.format(str(e))) | |
except Exception as e: | |
logging.critical('There are zero CSV files to parse: {}. Aborting'.format(str(e))) | |
except IOError as io_err: | |
logging.critical('Unable to access the local filesystem: {}'.format(str(io_err))) | |
except IOError as io_err: | |
logging.critical('Unable to access the local filesystem: {}'.format(str(io_err))) | |
# return the counter total records | |
return counter | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment