Last active
April 10, 2018 18:39
-
-
Save azadag/96b7d6ed371c241d755114d5b5e43a18 to your computer and use it in GitHub Desktop.
basic ipums summarisation examples
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
install.packages('data.table', 'dplyr', 'plyr') | |
library(dplyr) | |
data_acs <- data.table::fread("./data/usa_00035.csv") | |
data_acs <- tbl_df(data_acs) | |
## make an example age dummy variable | |
data_acs$AGE_VAR <- ifelse(data_acs$AGE < 15, 1, 0) | |
data_acs$AGE_VAR <- ifelse(data_acs$AGE >= 24 & data_acs$AGE <= 36 , 2, data_acs$AGE_VAR) | |
data_acs$AGE_VAR <- ifelse(data_acs$AGE > 36 & data_acs$AGE <= 51 , 3, data_acs$AGE_VAR) | |
data_acs$AGE_VAR <- ifelse(data_acs$AGE > 51 & data_acs$AGE < 70, 4, data_acs$AGE_VAR) | |
data_acs$AGE_VAR <- ifelse(data_acs$AGE > 71 , 5, data_acs$AGE_VAR) | |
#### create income groups determination ##### | |
levels <- c(-Inf, 50000, 74999, 99999, 124999, 149999, 199999, 299999, 399999, 599999, Inf) | |
labels <- c("Less50k", "b50_75", "b75_100", "b100_125", "b125_149", "b150_200", "b200_300", | |
"b300_400", "b400_600", "More600") | |
data_acs$HHINCOME <- with( data_acs, ifelse( HHINCOME == 9999999, 0, HHINCOME)) | |
## make an education dummy ## | |
data_acs <- data_acs %>% mutate(EDUC1 = as.factor(ifelse(EDUCD <= 61, "Less than HS", | |
ifelse(EDUCD == 62 | EDUCD == 63 | EDUCD == 64, "HS / equiv", | |
ifelse(EDUCD == 65 | EDUCD == 70 | EDUCD == 71 | EDUCD == 80| EDUCD == 90 | EDUCD == 100 , "Some College", | |
ifelse(EDUCD == 82 | EDUCD == 83, "AD", | |
ifelse(EDUCD == 101, "College Degree", | |
ifelse( EDUCD == 114 | EDUCD == 115, "Masters / Professional", | |
ifelse( EDUCD == 116, "Doctoral", | |
"NA"))))))))) | |
data_acs18$MARST <- as.factor(data_acs18$MARST) | |
data_acs18 <- data_acs18 %>% mutate(married = plyr::mapvalues(MARST, from = c("1","2","3","4","5","6"), to = c("1","1","3","2","3","2"))) | |
# only keep 18+ | |
data_acs18 <- data_acs %>% filter(AGE >= 18) | |
## example summarisations | |
## of owners what percentage have different education levels | |
counts <- data_acs18 %>% filter( OWNERSHP== "2", YEAR == 2014) %>% | |
group_by(EDUC1) %>% | |
summarize( num = sum(PERWT) ) %>% | |
mutate(PERpct = num / sum(num)) | |
counts | |
## Of owners what is the ownership rate by education and age groups | |
counts <- data_acs18 %>% filter( OWNERSHP== "2", YEAR == 2014) %>% group_by(EDUC1) %>% summarize( num = sum(PERWT) ) %>% mutate(PERpct = num / sum(num)) | |
counts | |
write.csv(counts, "./csv_out/CA_educ_nums.csv") | |
counts <- data_acs18 %>% filter( OWNERSHP== "2", YEAR == 2014) %>% group_by(AGE_VAR3, EDUC1) %>% summarize( num = sum(PERWT) ) %>% mutate(PERpct = num / sum(num)) | |
counts | |
write.csv(counts, "./csv_out/CA_educ_agevar_nums.csv") | |
## marriage renters 2014 | |
counts <- data_acs18 %>% filter( OWNERSHP== "2", YEAR == 2014) %>% group_by(married) %>% summarize( num = sum(PERWT) ) %>% mutate(PERpct = num / sum(num)) | |
counts | |
write.csv(counts, "./csv_out/CA_married_nums.csv") | |
counts <- data_acs18 %>% filter(OWNERSHP== "2", YEAR == 2014) %>% group_by(AGE_VAR3, married) %>% summarize( num = sum(PERWT) ) %>% mutate(PERpct = num / sum(num)) | |
counts | |
write.csv(counts, "./csv_out/CA_age_var_married_nums.csv") | |
#### | |
counts <- data_acs18 %>% filter(OWNERSHP!= "0", YEAR == 2014) %>% group_by(OWNERSHP, AGE_VAR3, SEX, RACEB, hhincsum) %>% summarize( num = sum(PERWT) ) %>% mutate(PERpct = num / sum(num)) | |
counts | |
write.csv(counts, "./csv_out/CA_rentersnums.csv") | |
counts <- data_acs18 %>% filter(OWNERSHP!= "0", YEAR == 2014, !is.na(AGE_VAR3), AGE_VAR3 !=4, AGE_VAR3 !=0, RACEB != "NA", RACEB != "2X" ) %>% | |
group_by(OWNERSHP, AGE_VAR3, RACEB, SEX) %>% summarize( num = sum(PERWT) ) %>% mutate(PERpct = num / sum(num)) | |
counts | |
write.csv(counts, "./csv_out/CA_rentersnums_2.csv") | |
counts <- data_acs18 %>% filter(OWNERSHP!= "0", YEAR == 2014, !is.na(AGE_VAR3), AGE_VAR3 !=4, AGE_VAR3 !=0) %>% | |
group_by(OWNERSHP, AGE_VAR3, SEX, hhincsum) %>% summarize( num = sum(PERWT) ) %>% mutate(PERpct = num / sum(num)) | |
counts | |
write.csv(counts, "./csv_out/CA_rentersnumsinc_0.csv") | |
counts <- data_acs18 %>% filter(OWNERSHP!= "0", YEAR == 2014, !is.na(AGE_VAR3), AGE_VAR3 !=4, AGE_VAR3 !=0) %>% | |
group_by(OWNERSHP, AGE_VAR3, SEX, RACEB) %>% summarize( num = sum(PERWT) ) #%>% mutate(PERpct = num / sum(num)) | |
counts | |
write.csv(counts, "./csv_out/CA_rentersnumsrace_0.csv") | |
counts <- data_acs18 %>% filter(OWNERSHP!= "0", YEAR == 2014, !is.na(AGE_VAR4)) %>% group_by(OWNERSHP, AGE_VAR4, SEX, RACEB) %>% summarize( num = sum(PERWT) ) %>% mutate(PERpct = num / sum(num)) | |
counts | |
write.csv(counts, "./csv_out/CA_rentersnumsrace_0.csv") | |
chartHH <- data_acs_hhinc1 %>% filter(YEAR == "2014", YEAR == 2014) %>% group_by(HHTYPE, RACEB, SEX) %>% summarize(HH = sum(HHWT)) %>% | |
ungroup() %>% group_by(HHTYPE) %>% mutate(HHpct = HH / sum(HH)) | |
chartHH0 <- data_acs_hhinc1 %>% group_by(YEAR, OWNERSHP) %>% summarize(HH = sum(HHWT)) %>% | |
ungroup() %>% group_by(YEAR) %>% mutate(HHpct = HH / sum(HH)) | |
chartHH0$OWNERSHP <- as.factor(chartHH0$OWNERSHP) | |
chartHHyearOwnRA2015 <- data_acs_hhinc1 %>% filter(YEAR == "2014") %>% group_by( OWNERSHP, RACEB, AGE_VAR3, SEX) %>% summarize(HH = sum(HHWT)) %>% | |
group_by( RACEB, AGE_VAR3) %>% mutate(HHpct = HH / sum(HH)) | |
chartHHyearOwnRA2015$OWNERSHP <- as.factor(chartHHyearOwnRA2015$OWNERSHP) | |
head(chartHHyearOwnRA2015) | |
chartHH0out <- chartHH0 | |
chartHH0out$HHpct <- round(chartHH0out$HHpct,2) | |
chartHH0out <- chartHH0out %>% filter( OWNERSHP != "0") | |
chartHH0out <- chartHH0out %>% mutate( OwnershipType = plyr::mapvalues(OWNERSHP, c(1,2), c('Owners', 'Renters'))) | |
chartHH0out <- chartHH0out %>% select(YEAR, HH, HHpct, OwnershipType) | |
# chartHH0out %>% tidyr::spread(OwnershipType, HH ) | |
write.csv(chartHH0out, ".\\csv_out\\CA_ownerssh_pct.csv") | |
chartHH0out1 <- chartHH0out %>% select(YEAR, HHpct, OwnershipType) %>% tidyr::spread(OwnershipType, HHpct ) | |
chartHH0out2 <- chartHH0out %>% select(YEAR, HH, OwnershipType) %>% tidyr::spread(OwnershipType, HH ) | |
names(chartHH0out1) <- c("YEAR","OwnersPCT", "RentersPCT") | |
chartHH0outstate <- cbind(chartHH0out1, chartHH0out2) | |
write.csv(chartHH0outstate, ".\\csv_out\\CA_ownerssh_pctWIDE.csv") | |
## race | |
chartHH0out <- chartHHyearOwn %>% filter( OWNERSHP == "1") %>% select(-OWNERSHP) | |
chartHH0out$HHpct <- round(chartHH0out$HHpct,3) | |
# chartHH0out <- chartHH0out %>% filter( OWNERSHP != "0") | |
# chartHH0out <- chartHH0out %>% mutate( OwnershipType = plyr::mapvalues(OWNERSHP, c(1,2), c('Owners', 'Renters'))) | |
chartHH0out <- chartHH0out %>% select(YEAR, HH, HHpct, RACEB) | |
# chartHH0out %>% tidyr::spread(OwnershipType, HH ) | |
write.csv(chartHH0out, ".\\csv_out\\CA_ownerssh_pctRACE.csv") | |
chartHH0out1 <- chartHH0out %>% select(YEAR, HHpct, RACEB) %>% tidyr::spread(RACEB, HHpct ) | |
chartHH0out2 <- chartHH0out %>% select(YEAR, HH, RACEB) %>% tidyr::spread(RACEB, HH ) | |
names(chartHH0out1) <- c("YEAR","ASIANpct", "BLACKpct", "HISPANICpct", "OTHERpct", "WHITEpct") | |
chartHH0outstateRACE <- cbind(chartHH0out1, chartHH0out2) | |
write.csv(chartHH0outstateRACE, ".\\csv_out\\CA_ownerssh_pctRACWIDE.csv") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment