Created
September 25, 2025 21:50
-
-
Save superMDguy/9170fec9c88a128383da4d7431c65fbd to your computer and use it in GitHub Desktop.
Create a location "hash" to uniquely identify an address, useful for de-duping messy address data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| UPDATE TABLE_NAME | |
| SET location_hash = CASE | |
| -- Set to NULL if address_1 or city or zip is null | |
| WHEN address_1 IS NULL OR city IS NULL OR zip IS NULL | |
| THEN NULL | |
| ELSE md5(CONCAT( | |
| -- Standardize the combined street address | |
| COALESCE( | |
| -- Hack to set to NULL if standardize_address returns row with all empty fields | |
| NULLIF( | |
| -- Only include address_2 if it is not null (otherwise standardize_address becomes unstable) | |
| standardize_address('us_lex', 'us_gaz', 'us_rules', address_1 || CASE WHEN address_2 IS NOT NULL THEN ' ' || address_2 ELSE '' END, '')::text, | |
| '(,,,,,,,,,,,,,,,)' | |
| ), | |
| -- Fallback to raw address if standardization fails | |
| UPPER((address_1 || CASE WHEN address_2 IS NOT NULL THEN ' ' || address_2 ELSE '' END)) | |
| ), '|', | |
| -- City in uppercase for consistency | |
| UPPER(city::text), '|', | |
| -- Only first 5 digits of ZIP | |
| LEFT(zip::text, 5) | |
| ))::text | |
| END | |
| WHERE location_hash IS NULL; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment