Created
June 26, 2024 22:52
-
-
Save rjurney/65404b9fb0e3f5147d167bdf50001fbb to your computer and use it in GitHub Desktop.
Script to extract addresses, names and company names from OpenSanctions Pairs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# Quickly extract all unique address, person and company name records from pairs.json: https://www.opensanctions.org/docs/pairs/ | |
# Note: non-commercial use only, affordable licenses available at https://www.opensanctions.org/licensing/ | |
# | |
# Get the data | |
wget https://data.opensanctions.org/contrib/training/pairs.json -O data/pairs.json | |
# Get all the unique address set pairs | |
jq -c '{left_addresses: .left.properties.address?, right_addresses: .right.properties.address?}' \ | |
data/pairs.json | grep -v '{"left_addresses":null,"right_addresses":null}' | sort | uniq > data/addresses.json | |
# Get all the unique people name set pairs | |
jq -c '{ | |
left_person: (if .left.schema == "Person" then .left.properties.name? else null end), | |
right_person: (if .right.schema == "Person" then .right.properties.name? else null end), | |
}' data/pairs.json | grep -v '{"left_person":null,"right_person":null}' | sort | uniq > data/people.json | |
# Get all the unique company name set pairs | |
jq -c '{ | |
left_company: (if .left.schema == "Organization" then .left.properties.name? else null end), | |
right_company: (if .right.schema == "Organization" then .right.properties.name? else null end), | |
}' data/pairs.json | grep -v '{"left_company":null,"right_company":null}' | sort | uniq > data/companies.json |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment