Skip to content

Instantly share code, notes, and snippets.

@USMortality
Created November 28, 2024 19:33
Show Gist options
  • Save USMortality/eb17e6eb518473274ac3ee9b3ecc5caf to your computer and use it in GitHub Desktop.
Save USMortality/eb17e6eb518473274ac3ee9b3ecc5caf to your computer and use it in GitHub Desktop.
USA UCD/MCD Deaths 2017-2022
#!/bin/sh
BASE_URL="https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/DVS/mortality"
OUTPUT_DIR="mortality_data"
YEARS="2017 2018 2019 2020 2021 2022"
OUTPUT_CSV="mcd.csv"
mkdir -p "$OUTPUT_DIR"
echo "id,year,month,age,ucd,mcd" > "$OUTPUT_CSV"
for YEAR in $YEARS; do
YY="${YEAR: -2}" # Extract last two digits of the year
ZIP_FILE="mort${YEAR}us.zip"
wget -q --show-progress -O "$OUTPUT_DIR/$ZIP_FILE" "$BASE_URL/$ZIP_FILE"
unzip -o "$OUTPUT_DIR/$ZIP_FILE" -d "$OUTPUT_DIR"
DATA_FILE=$(find "$OUTPUT_DIR" -type f -iname "Mort20${YY}US.PubUse.txt" \
-or -iname "VS${YY}*MORT.DUSMCPUB*")
awk -v year="$YEAR" 'BEGIN{OFS=","}
NR > 1 {
if(substr($0, 20, 1) == 4) next;
year = substr($0, 102, 4);
id = sprintf("%d%07d", year, FNR);
month = substr($0, 65, 2);
age = (substr($0, 70, 1) == 1 ? substr($0, 71, 3) : 0);
ucd = substr($0, 146, 4);
mcds = int(substr($0, 163, 2));
for (i = 0; i < mcds; i++) {
print id, year, month, age, ucd, substr($0, 165 + i * 7 + 2, 4);
}
}' "$DATA_FILE" >> "$OUTPUT_CSV"
done
rm -f "$OUTPUT_DIR"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment