-
-
Save rogerallen/1583593 to your computer and use it in GitHub Desktop.
# United States of America Python Dictionary to translate States, | |
# Districts & Territories to Two-Letter codes and vice versa. | |
# | |
# Canonical URL: https://gist.github.com/rogerallen/1583593 | |
# | |
# Dedicated to the public domain. To the extent possible under law, | |
# Roger Allen has waived all copyright and related or neighboring | |
# rights to this code. Data originally from Wikipedia at the url: | |
# https://en.wikipedia.org/wiki/ISO_3166-2:US | |
# | |
# Automatically Generated 2024-10-08 07:45:06 via Jupyter Notebook from | |
# https://gist.github.com/rogerallen/d75440e8e5ea4762374dfd5c1ddf84e0 | |
us_state_to_abbrev = { | |
"Alabama": "AL", | |
"Alaska": "AK", | |
"Arizona": "AZ", | |
"Arkansas": "AR", | |
"California": "CA", | |
"Colorado": "CO", | |
"Connecticut": "CT", | |
"Delaware": "DE", | |
"Florida": "FL", | |
"Georgia": "GA", | |
"Hawaii": "HI", | |
"Idaho": "ID", | |
"Illinois": "IL", | |
"Indiana": "IN", | |
"Iowa": "IA", | |
"Kansas": "KS", | |
"Kentucky": "KY", | |
"Louisiana": "LA", | |
"Maine": "ME", | |
"Maryland": "MD", | |
"Massachusetts": "MA", | |
"Michigan": "MI", | |
"Minnesota": "MN", | |
"Mississippi": "MS", | |
"Missouri": "MO", | |
"Montana": "MT", | |
"Nebraska": "NE", | |
"Nevada": "NV", | |
"New Hampshire": "NH", | |
"New Jersey": "NJ", | |
"New Mexico": "NM", | |
"New York": "NY", | |
"North Carolina": "NC", | |
"North Dakota": "ND", | |
"Ohio": "OH", | |
"Oklahoma": "OK", | |
"Oregon": "OR", | |
"Pennsylvania": "PA", | |
"Rhode Island": "RI", | |
"South Carolina": "SC", | |
"South Dakota": "SD", | |
"Tennessee": "TN", | |
"Texas": "TX", | |
"Utah": "UT", | |
"Vermont": "VT", | |
"Virginia": "VA", | |
"Washington": "WA", | |
"West Virginia": "WV", | |
"Wisconsin": "WI", | |
"Wyoming": "WY", | |
"District of Columbia": "DC", | |
"American Samoa": "AS", | |
"Guam": "GU", | |
"Northern Mariana Islands": "MP", | |
"Puerto Rico": "PR", | |
"United States Minor Outlying Islands": "UM", | |
"Virgin Islands, U.S.": "VI", | |
} | |
# invert the dictionary | |
abbrev_to_us_state = dict(map(reversed, us_state_to_abbrev.items())) | |
Thank YOU!!
What a nice service to other developers. Makes me think of building a package with lots of lookups needed for cartography, like a FIPS lookup for example. Now, I bet someone else already did that, too.
Thank for this.
I am trying to do something similar but worldwide.
I have list of locations that is mixed with states, cities and countries, counties and regions in abbreviations and some in full. For instance, NY, CA, England, UK, USA, Minnesota, London, Bradford, etc. I want it all to be converted to countries such as NY=USA, England=UK, Scotland = UK, Minnesota = USA, etc.
Is it possible to do this in python?
Thanks in advance.
Sweet! You just save me a lot of time!
@kinghelix You are today's hero!
In case anyone needs these in the form of enums.
from enum import Enum
from functools import cache
class State(str, Enum):
ALABAMA = "Alabama"
ALASKA = "Alaska"
AMERICAN_SAMOA = "American Samoa"
ARIZONA = "Arizona"
ARKANSAS = "Arkansas"
CALIFORNIA = "California"
COLORADO = "Colorado"
CONNECTICUT = "Connecticut"
DELAWARE = "Delaware"
DISTRICT_OF_COLUMBIA = "District of Columbia"
FLORIDA = "Florida"
GEORGIA = "Georgia"
GUAM = "Guam"
HAWAII = "Hawaii"
IDAHO = "Idaho"
ILLINOIS = "Illinois"
INDIANA = "Indiana"
IOWA = "Iowa"
KANSAS = "Kansas"
KENTUCKY = "Kentucky"
LOUISIANA = "Louisiana"
MAINE = "Maine"
MARYLAND = "Maryland"
MASSACHUSETTS = "Massachusetts"
MICHIGAN = "Michigan"
MINNESOTA = "Minnesota"
MISSISSIPPI = "Mississippi"
MISSOURI = "Missouri"
MONTANA = "Montana"
NEBRASKA = "Nebraska"
NEVADA = "Nevada"
NEW_HAMPSHIRE = "New Hampshire"
NEW_JERSEY = "New Jersey"
NEW_MEXICO = "New Mexico"
NEW_YORK = "New York"
NORTH_CAROLINA = "North Carolina"
NORTH_DAKOTA = "North Dakota"
NORTHERN_MARIANA_ISLANDS = "Northern Mariana Islands"
OHIO = "Ohio"
OKLAHOMA = "Oklahoma"
OREGON = "Oregon"
PENNSYLVANIA = "Pennsylvania"
PUERTO_RICO = "Puerto Rico"
RHODE_ISLAND = "Rhode Island"
SOUTH_CAROLINA = "South Carolina"
SOUTH_DAKOTA = "South Dakota"
TENNESSEE = "Tennessee"
TEXAS = "Texas"
UTAH = "Utah"
VERMONT = "Vermont"
VIRGIN_ISLANDS = "Virgin Islands"
VIRGINIA = "Virginia"
WASHINGTON = "Washington"
WEST_VIRGINIA = "West Virginia"
WISCONSIN = "Wisconsin"
WYOMING = "Wyoming"
@cache
def get_state_enum(state: str) -> State:
state_mappings = {
"AL": State.ALABAMA,
"AS": State.AMERICAN_SAMOA,
"AK": State.ALASKA,
"AZ": State.ARIZONA,
"AR": State.ARKANSAS,
"CA": State.CALIFORNIA,
"CO": State.COLORADO,
"CT": State.CONNECTICUT,
"DC": State.DISTRICT_OF_COLUMBIA,
"DE": State.DELAWARE,
"FL": State.FLORIDA,
"GA": State.GEORGIA,
"GU": State.GUAM,
"HI": State.HAWAII,
"ID": State.IDAHO,
"IL": State.ILLINOIS,
"IN": State.INDIANA,
"IA": State.IOWA,
"KS": State.KANSAS,
"KY": State.KENTUCKY,
"LA": State.LOUISIANA,
"ME": State.MAINE,
"MD": State.MARYLAND,
"MA": State.MASSACHUSETTS,
"MI": State.MICHIGAN,
"MN": State.MINNESOTA,
"MP": State.NORTHERN_MARIANA_ISLANDS,
"MS": State.MISSISSIPPI,
"MO": State.MISSOURI,
"MT": State.MONTANA,
"NE": State.NEBRASKA,
"NV": State.NEVADA,
"NH": State.NEW_HAMPSHIRE,
"NJ": State.NEW_JERSEY,
"NM": State.NEW_MEXICO,
"NY": State.NEW_YORK,
"NC": State.NORTH_CAROLINA,
"ND": State.NORTH_DAKOTA,
"OH": State.OHIO,
"OK": State.OKLAHOMA,
"OR": State.OREGON,
"PA": State.PENNSYLVANIA,
"PR": State.PUERTO_RICO,
"RI": State.RHODE_ISLAND,
"SC": State.SOUTH_CAROLINA,
"SD": State.SOUTH_DAKOTA,
"TN": State.TENNESSEE,
"TX": State.TEXAS,
"UT": State.UTAH,
"VT": State.VERMONT,
"VA": State.VIRGINIA,
"VI": State.VIRGIN_ISLANDS,
"WA": State.WASHINGTON,
"WV": State.WEST_VIRGINIA,
"WI": State.WISCONSIN,
"WY": State.WYOMING,
**{enum.value: enum for enum in State},
}
return state_mappings[state]
Note: updated to include more readable enums (full state names instead of abbreviations), and added a mapper function to return enums from both full and abbreviated names. If you're using Python <3.9, you can replace the @cache decorator with @lru_cache.
Bumped into this while working on a Coursera Data Science class and looking for something of this form for a quick script, just wanted to be sure to say thanks! :)
Thank you!
Awesome! Thanks for the time saver :)
Here it is reversed and including DC, Northern Mariana Islands, Palau, Puerto Rico, the Virgin Islands, and US military base abbreviations.
us_state_abbrev = {
'AL': 'Alabama',
'AK': 'Alaska',
'AZ': 'Arizona',
'AR': 'Arkansas',
'CA': 'California',
'CO': 'Colorado',
'CT': 'Connecticut',
'DE': 'Delaware',
'FL': 'Florida',
'GA': 'Georgia',
'HI': 'Hawaii',
'ID': 'Idaho',
'IL': 'Illinois',
'IN': 'Indiana',
'IA': 'Iowa',
'KS': 'Kansas',
'KY': 'Kentucky',
'LA': 'Louisiana',
'ME': 'Maine',
'MD': 'Maryland',
'MA': 'Massachusetts',
'MI': 'Michigan',
'MN': 'Minnesota',
'MS': 'Mississippi',
'MO': 'Missouri',
'MT': 'Montana',
'NE': 'Nebraska',
'NV': 'Nevada',
'NH': 'New Hampshire',
'NJ': 'New Jersey',
'NM': 'New Mexico',
'NY': 'New York',
'NC': 'North Carolina',
'ND': 'North Dakota',
'OH': 'Ohio',
'OK': 'Oklahoma',
'OR': 'Oregon',
'PA': 'Pennsylvania',
'RI': 'Rhode Island',
'SC': 'South Carolina',
'SD': 'South Dakota',
'TN': 'Tennessee',
'TX': 'Texas',
'UT': 'Utah',
'VT': 'Vermont',
'VA': 'Virginia',
'WA': 'Washington',
'WV': 'West Virginia',
'WI': 'Wisconsin',
'WY': 'Wyoming',
'DC': 'District of Columbia',
'MP': 'Northern Mariana Islands',
'PW': 'Palau',
'PR': 'Puerto Rico',
'VI': 'Virgin Islands',
'AA': 'Armed Forces Americas (Except Canada)',
'AE': 'Armed Forces Africa/Canada/Europe/Middle East',
'AP': 'Armed Forces Pacific'
}
Those east of the Mississippi River:
['AL', 'CT', 'DE', 'FL', 'GA', 'IL', 'IN', 'KY', 'ME', 'MD', 'MA', 'MI', 'MS', 'NH', 'NJ', 'NY', 'NC', 'OH', 'PA', 'RI', 'SC', 'TN', 'VT', 'VA', 'WV', 'WI']
Just what I need, cheers for creating this and sharing it!
Hereby an auto enumerator.
from enum import IntEnum, auto
class US_States(IntEnum):
AL = auto() # Alabama
AK = auto() # Alaska
AS = auto() # American Samoa
AZ = auto() # Arizona
AR = auto() # Arkansas
CA = auto() # California
CO = auto() # Colorado
CT = auto() # Connecticut
DE = auto() # Delaware
DC = auto() # District of Columbia
FL = auto() # Florida
GA = auto() # Georgia
GU = auto() # Guam
HI = auto() # Hawaii
ID = auto() # Idaho
IL = auto() # Illinois
IN = auto() # Indiana
IA = auto() # Iowa
KS = auto() # Kansas
KY = auto() # Kentucky
LA = auto() # Louisiana
ME = auto() # Maine
MD = auto() # Maryland
MA = auto() # Massachusetts
MI = auto() # Michigan
MN = auto() # Minnesota
MS = auto() # Mississippi
MO = auto() # Missouri
MT = auto() # Montana
NE = auto() # Nebraska
NV = auto() # Nevada
NH = auto() # New Hampshire
NJ = auto() # New Jersey
NM = auto() # New Mexico
NY = auto() # New York
NC = auto() # North Carolina
ND = auto() # North Dakota
MP = auto() # Northern Mariana Islands
OH = auto() # Ohio
OK = auto() # Oklahoma
OR = auto() # Oregon
PA = auto() # Pennsylvania
PR = auto() # Puerto Rico
RI = auto() # Rhode Island
SC = auto() # South Carolina
SD = auto() # South Dakota
TN = auto() # Tennessee
TX = auto() # Texas
UT = auto() # Utah
VT = auto() # Vermont
VI = auto() # Virgin Islands
VA = auto() # Virginia
WA = auto() # Washington
WV = auto() # West Virginia
WI = auto() # Wisconsin
WY = auto() # Wyoming
Here is one to encode US states based on their population ranking (might be usefull for some further correlation analysis):
us_state_to_population_ranking_encoding = {
"CA": 52, "TX": 51, "FL": 50, "NY": 49, "PA": 48, "IL": 47,
"OH": 46, "GA": 45, "NC": 44, "MI": 43, "NJ": 42, "VA": 41,
"WA": 40, "AZ": 39, "TN": 38, "MA": 37, "IN": 36, "MO": 35,
"MD": 34, "CO": 33, "WI": 32, "MN": 31, "SC": 30, "AL": 29,
"LA": 28, "KY": 27, "OR": 26, "OK": 25, "CT": 24, "UT": 23,
"PR": 22, "NV": 21, "IA": 20, "AR": 19, "MS": 18, "KS": 17,
"NM": 16, "NE": 15, "ID": 14, "WV": 13, "HI": 12, "NH": 11,
"ME": 10, "MT": 9, "RI": 8, "DE": 7, "SD": 6, "ND": 5,
"AK": 4, "DC": 3, "VT": 2, "WY": 1
}
Thank you!
Thank you!! And Happy New Year!
I just came in today to parse legal data from all US states and this came very in handy! Thanks buddy!
us_state_to_abbrev = {
"Alabama": "AL",
"Alaska": "AK",
"Arizona": "AZ",
"Arkansas": "AR",
"California": "CA",
"Colorado": "CO",
"Connecticut": "CT",
"Delaware": "DE",
"Florida": "FL",
"Georgia": "GA",
"Hawaii": "HI",
"Idaho": "ID",
"Illinois": "IL",
"Indiana": "IN",
"Iowa": "IA",
"Kansas": "KS",
"Kentucky": "KY",
"Louisiana": "LA",
"Maine": "ME",
"Maryland": "MD",
"Massachusetts": "MA",
"Michigan": "MI",
"Minnesota": "MN",
"Mississippi": "MS",
"Missouri": "MO",
"Montana": "MT",
"Nebraska": "NE",
"Nevada": "NV",
"New Hampshire": "NH",
"New Jersey": "NJ",
"New Mexico": "NM",
"New York": "NY",
"North Carolina": "NC",
"North Dakota": "ND",
"Ohio": "OH",
"Oklahoma": "OK",
"Oregon": "OR",
"Pennsylvania": "PA",
"Rhode Island": "RI",
"South Carolina": "SC",
"South Dakota": "SD",
"Tennessee": "TN",
"Texas": "TX",
"Utah": "UT",
"Vermont": "VT",
"Virginia": "VA",
"Washington": "WA",
"West Virginia": "WV",
"Wisconsin": "WI",
"Wyoming": "WY",
"District of Columbia": "DC",
"American Samoa": "AS",
"Guam": "GU",
"Northern Mariana Islands": "MP",
"Puerto Rico": "PR",
"United States Minor Outlying Islands": "UM",
"U.S. Virgin Islands": "VI",
"AL": "AL",
"AK": "AK",
"AZ": "AZ",
"AR": "AR",
"CA": "CA",
"CO": "CO",
"CT": "CT",
"DE": "DE",
"FL": "FL",
"GA": "GA",
"HI": "HI",
"ID": "ID",
"IL": "IL",
"IN": "IN",
"IA": "IA",
"KS": "KS",
"KY": "KY",
"LA": "LA",
"ME": "ME",
"MD": "MD",
"MA": "MA",
"MI": "MI",
"MN": "MN",
"MS": "MS",
"MO": "MO",
"MT": "MT",
"NE": "NE",
"NV": "NV",
"NH": "NH",
"NJ": "NJ",
"NM": "NM",
"NY": "NY",
"NC": "NC",
"ND": "ND",
"OH": "OH",
"OK": "OK",
"OR": "OR",
"PA": "PA",
"RI": "RI",
"SC": "SC",
"SD": "SD",
"TN": "TN",
"TX": "TX",
"UT": "UT",
"VT": "VT",
"VA": "VA",
"WA": "WA",
"WV": "WV",
"WI": "WI",
"WY": "WY",
"DC": "DC",
"AS": "AS",
"GU": "GU",
"MP": "MP",
"PR": "PR",
"UM": "UM",
"VI": "VI",
}
needed this for standardizing the column where some values were full names and some were abbreviations
@pnojai take a look at : https://pypi.org/project/us/
thanks a lot
Thank you for sharing this information
@sandlerj It's worked very well, and thank you
Thank you!
Commenting as well, so everyone gets spam on this gist! haha
Everyone who contributed to this I hope nothing but the best for you! Thank you!
Updated with latest results from Wikipedia-scraping script. Only change was to Virgin Islands.
This's greeeeeat!!