Last active
September 19, 2024 09:09
-
-
Save firesofmay/0a96c847b01bd70ba04bbe829e27cc8f to your computer and use it in GitHub Desktop.
This script processes a CSV file containing addresses, normalizes them, and adds city, state abbreviation, and full state name columns.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
CSV Address Normalizer | |
This script processes a CSV file containing addresses, normalizes them, and adds city, state abbreviation, and full state name columns. | |
Usage: | |
python main.py | |
Requirements: | |
- Python 3.6+ | |
- Install required packages: | |
pip install usaddress-scourgify us | |
Input: | |
- CSV file with at least an 'address' column | |
Output: | |
- CSV file with additional columns: 'city', 'state_abbr', 'state_full' | |
""" | |
import csv | |
from scourgify import normalize_address_record | |
import us | |
def process_csv(input_file_path, output_file_path): | |
with open(input_file_path, 'r', newline='') as infile, open(output_file_path, 'w', newline='') as outfile: | |
reader = csv.reader(infile) | |
writer = csv.writer(outfile) | |
headers = next(reader, None) | |
if headers is None: | |
print("The input CSV file is empty.") | |
return | |
address_index = None | |
for index, header in enumerate(headers): | |
if header.lower() == "address": | |
address_index = index | |
break | |
if address_index is None: | |
print("Address column not found in the CSV file.") | |
return | |
new_headers = headers + ['city', 'state_abbr', 'state_full'] | |
writer.writerow(new_headers) | |
for row in reader: | |
address = row[address_index] | |
try: | |
normalized = normalize_address_record(address) | |
city = normalized.get('city', '') | |
state_abbr = normalized.get('state', '') | |
state_full = us.states.lookup(state_abbr).name if state_abbr else '' | |
except Exception as e: | |
print(f"Error normalizing address: {address}. Error: {str(e)}") | |
city = state_abbr = state_full = '' | |
new_row = row + [city, state_abbr, state_full] | |
writer.writerow(new_row) | |
print(f"Processing complete. Output saved to {output_file_path}") | |
# Example usage | |
input_file_path = "input.csv" # Replace with your input CSV file path | |
output_file_path = "output.csv" # Replace with your desired output file path | |
process_csv(input_file_path, output_file_path) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment