firesofmay · September 19, 2024 09:09
diff --git a/address_explode.py b/address_explode.py
 """
 CSV Address Normalizer

 This script processes a CSV file containing addresses, normalizes them, and adds city, state abbreviation, and full state name columns.

 Usage:
    python main.py

 Requirements:
    - Python 3.6+
    - Install required packages:
        pip install usaddress-scourgify us

 Input:
    - CSV file with at least an 'address' column

 Output:
    - CSV file with additional columns: 'city', 'state_abbr', 'state_full'
 """

 import csv
 from scourgify import normalize_address_record
 import us

 def process_csv(input_file_path, output_file_path):
    with open(input_file_path, 'r', newline='') as infile, open(output_file_path, 'w', newline='') as outfile:
        reader = csv.reader(infile)
        writer = csv.writer(outfile)

        headers = next(reader, None)
        if headers is None:
            print("The input CSV file is empty.")
            return

        address_index = None
        for index, header in enumerate(headers):
            if header.lower() == "address":
                address_index = index
                break

        if address_index is None:
            print("Address column not found in the CSV file.")
            return

        new_headers = headers + ['city', 'state_abbr', 'state_full']
        writer.writerow(new_headers)

        for row in reader:
            address = row[address_index]
            try:
                normalized = normalize_address_record(address)
                city = normalized.get('city', '')
                state_abbr = normalized.get('state', '')
                state_full = us.states.lookup(state_abbr).name if state_abbr else ''
            except Exception as e:
                print(f"Error normalizing address: {address}. Error: {str(e)}")
                city = state_abbr = state_full = ''

            new_row = row + [city, state_abbr, state_full]
            writer.writerow(new_row)

    print(f"Processing complete. Output saved to {output_file_path}")

 # Example usage
 input_file_path = "input.csv"  # Replace with your input CSV file path
 output_file_path = "output.csv"  # Replace with your desired output file path
 process_csv(input_file_path, output_file_path)
	"""
	CSV Address Normalizer

	This script processes a CSV file containing addresses, normalizes them, and adds city, state abbreviation, and full state name columns.

	Usage:
	python main.py

	Requirements:
	- Python 3.6+
	- Install required packages:
	pip install usaddress-scourgify us

	Input:
	- CSV file with at least an 'address' column

	Output:
	- CSV file with additional columns: 'city', 'state_abbr', 'state_full'
	"""

	import csv
	from scourgify import normalize_address_record
	import us

	def process_csv(input_file_path, output_file_path):
	with open(input_file_path, 'r', newline='') as infile, open(output_file_path, 'w', newline='') as outfile:
	reader = csv.reader(infile)
	writer = csv.writer(outfile)

	headers = next(reader, None)
	if headers is None:
	print("The input CSV file is empty.")
	return

	address_index = None
	for index, header in enumerate(headers):
	if header.lower() == "address":
	address_index = index
	break

	if address_index is None:
	print("Address column not found in the CSV file.")
	return

	new_headers = headers + ['city', 'state_abbr', 'state_full']
	writer.writerow(new_headers)

	for row in reader:
	address = row[address_index]
	try:
	normalized = normalize_address_record(address)
	city = normalized.get('city', '')
	state_abbr = normalized.get('state', '')
	state_full = us.states.lookup(state_abbr).name if state_abbr else ''
	except Exception as e:
	print(f"Error normalizing address: {address}. Error: {str(e)}")
	city = state_abbr = state_full = ''

	new_row = row + [city, state_abbr, state_full]
	writer.writerow(new_row)

	print(f"Processing complete. Output saved to {output_file_path}")

	# Example usage
	input_file_path = "input.csv" # Replace with your input CSV file path
	output_file_path = "output.csv" # Replace with your desired output file path
	process_csv(input_file_path, output_file_path)