Last active
December 26, 2015 11:59
-
-
Save kpq/7147531 to your computer and use it in GitHub Desktop.
Scrape all U.S. adoptions by country of birth and year from the state department
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
get_country_data <- function(country_name) { | |
# this puts a plus instead of the space, which is how the URL behaves | |
country_name <- gsub(" ", "+", country_name) | |
#this fixes ivory coast | |
country_name <- gsub("`", "%60", country_name) | |
# first part of the url | |
first_part <- "http://adoption.state.gov/maps/statistics/map_files/statistics.php?special=NONE&year=ALL&country=" | |
#last part of the url | |
last_part <- "&state=NONE&returnType=TABLE" | |
#get the url for any country | |
url <- paste(first_part, country_name, last_part, sep = "") | |
#fetch it from the internet | |
this_country <- readHTMLTable(url) | |
#cast it as a data frame | |
this_country <- data.frame(this_country) | |
#rename the columns | |
colnames(this_country) <- c("year", "adoptions") | |
#sorry, there are factors | |
this_country$year <- as.numeric(as.character(this_country$year)) | |
this_country$adoptions <- as.numeric(as.character(this_country$adoptions)) | |
this_country$country <- country_name | |
return (this_country) | |
} | |
#a full list of countries, pasted from the site and formatted by hand | |
# you might prefer to load them from a csv...whichever works | |
countries <- c("Afghanistan","Albania","Algeria","Antigua and Barbuda","Argentina","Armenia","Australia","Austria","Azerbaijan","THE BAHAMAS","Bangladesh","Barbados","Belarus","Belgium","Belize","Benin","Bermuda","Bhutan","Bolivia","Bosnia-Herzegovina","Botswana","Brazil","Bulgaria","Burkina Faso","Burundi","Cambodia","Cameroon","Canada","Cape Verde","Central African Republic","Chad","Chile","China","Colombia","Congo-Kinshasa","Congo-Brazzaville","Costa Rica","Cote d`Ivoire","Croatia","Cuba","Cyprus","Czech Republic","Djibouti","Dominica","Dominican Republic","Ecuador","Egypt","El Salvador","Equatorial Guinea","Eritrea","Estonia","Ethiopia","Fiji","Finland","France","Gabon","The Gambia","Georgia","Germany","Ghana","Greece","Grenada","Guatemala","Guinea","Guinea-Bissau","Guyana","Haiti","Honduras","Hong Kong","Hungary","Iceland","India","Indonesia","Iran","Iraq","Ireland","Israel","Jamaica","Japan","Jordan","Kyrgyz Republic","Kenya","Kiribati","Kosovo","Kazakhstan","Laos","Latvia","Lebanon","Lesotho","Liberia","Libya","Lithuania","Macedonia","Madagascar","Malawi","Malaysia","Mali","MARSHALL ISLANDS, REPUBLIC OF THE","Mauritius","Mexico","MICRONESIA, FEDERATED STATES OF","Moldova","Mongolia","Montenegro","Morocco","Mozambique","Myanmar","Namibia","Nepal","Netherlands","New Zealand","Nicaragua","Niger","Nigeria","Norway","Oman","Pakistan","Palestinian Authority","Panama","Papua New Guinea","Paraguay","Peru","Philippines","Poland","Portugal","Romania","Russia","Rwanda","Samoa","Saudi Arabia","Senegal","Serbia","Seychelles","Sierra Leone","Singapore","Slovakia","Somalia","South Africa","South Korea","Spain","Sri Lanka","St. Kitts and Nevis","St. Lucia","St. Vincent and the Grenadines","Sudan","Suriname","Swaziland","Switzerland","Syria","Taiwan","Tajikistan","Tanzania","Thailand","Timor-Leste","Togo","Tonga","Trinidad and Tobago","Tunisia","Turkey","Turkmenistan","Uganda","Ukraine","United Kingdom","Uruguay","Uzbekistan","Vanuatu","Venezuela","Vietnam","Yemen","Zambia","Zimbabwe") | |
#run this function for every country, and store the result in a data frame called 'all' | |
all <- NULL | |
for (i in countries) { | |
print(i) | |
this_country <- get_country_data(i) | |
all <- rbind(this_country, all) | |
} | |
#done! write it to a csv is you like | |
write.csv(all, file="all_adoptions.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment