Skip to content

Instantly share code, notes, and snippets.

@pollend
Last active January 27, 2018 19:01
Show Gist options
  • Select an option

  • Save pollend/e01da86ba6483a2a5b5437538e75baec to your computer and use it in GitHub Desktop.

Select an option

Save pollend/e01da86ba6483a2a5b5437538e75baec to your computer and use it in GitHub Desktop.
collects funding history from nsf and transforms into csv
rm -rf ./data
mkdir data
cd data
for key in $(curl https://www.nsf.gov/awardsearch/download.jsp | grep -oP "download\?DownloadFileName=[0-9]+\&All=true"); do
wget --content-disposition https://www.nsf.gov/awardsearch/$key
done
import os
import xml.etree.ElementTree
import csv
with open("final.csv","w") as finalFile:
with open("investigator.csv", "w") as investigatorFile:
final_writer = csv.DictWriter(finalFile, fieldnames=["award_id","amount","award_title","award_effective_date","award_expiration_date","abstract","AwardInstrument"])
final_writer.writeheader()
investigator_writer = csv.DictWriter(investigatorFile, fieldnames=["award_id","start_date","first_name","last_name","email_address","role_code"])
investigator_writer.writeheader()
for folder in os.listdir("./process"):
print(folder)
for file in os.listdir("./process/" + folder ):
file = "./process/" + folder + "/" + file
if(os.path.isfile(file)):
try:
root = xml.etree.ElementTree.parse(file)
final = {}
final["award_id"] = root.find("Award/AwardID").text
final["award_title"] = root.find("Award/AwardTitle").text
final["award_effective_date"] = root.find("Award/AwardEffectiveDate").text
final["award_expiration_date"] = root.find("Award/AwardExpirationDate").text
final["abstract"] = root.find("Award/AbstractNarration").text
final["amount"] = root.find("Award/AwardAmount").text
final["AwardInstrument"] = ";".join(map(lambda x: x.text,root.findall("Award/AwardInstrument/Value")))
final_writer.writerow(final)
for investigator_root in root.findall("Award/Investigator"):
investigator = {}
investigator["award_id"] = final["award_id"]
investigator["first_name"] = investigator_root.find("FirstName").text
investigator["last_name"] = investigator_root.find("LastName").text
investigator["start_date"] = investigator_root.find("StartDate").text
investigator["role_code"] = investigator_root.find("RoleCode").text
investigator["email_address"] = investigator_root.find("EmailAddress").text
investigator_writer.writerow(investigator)
except Exception as e:
print(file)
print(e)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment