Created
April 22, 2020 09:46
-
-
Save greimel/76ce72f6c95153b88dfc329f55dfec9c to your computer and use it in GitHub Desktop.
cleaning SCF
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("haven") | |
library("data.table") | |
#library("ggplot2") | |
wd <- getwd() | |
#setwd("scf-data") | |
#setwd(wd) | |
### SCF 1989--2016 | |
scf89 <- read_dta("raw-data-2018/rscfp1989.dta") | |
scf92 <- read_dta("raw-data-2018/rscfp1992.dta") | |
scf95 <- read_dta("raw-data-2018/rscfp1995.dta") | |
scf98 <- read_dta("raw-data-2018/rscfp1998.dta") | |
scf01 <- read_dta("raw-data-2018/rscfp2001.dta") | |
scf04 <- read_dta("raw-data-2018/rscfp2004.dta") | |
scf07 <- read_dta("raw-data-2018/rscfp2007.dta") | |
scf10 <- read_dta("raw-data-2018/rscfp2010.dta") | |
scf13 <- read_dta("raw-data-2018/rscfp2013.dta") | |
scf16 <- read_dta("raw-data-2018/rscfp2016.dta") | |
### SCF 1983 | |
scf83b <- data.table(read_dta("raw-data-2018/scf83b.dta")) | |
old = c("b3305", "b3320", "v1", "b3005", "b3201") | |
new = c("asset", "debt", "Y1", "wgt", "income") | |
old2 = c("b3705", "b3708", "b4001", "b3802", "b3602") | |
new2 = c("monthly rent", "houses", "NH_MORT", "other mortgages", "land contracts/notes") | |
old3 <- c("b3814", "b3831", "b3848", "b3815", "b3832", "b3849") | |
new3 <- c("paym1", "paym2", "paym3", "freq1", "freq2", "freq3") | |
## original mortgage, year purchased, purchase price, available | |
old4 <- c("b3101", "b3102", "b3104", "b3109", "b3112", "b3108", "b3107") | |
new4 <- c("fam_size", "no_18_plus", "no_18_minus", "family_type", "marital", "kids_total", "kids_away") | |
setnames(scf83b, old, new) | |
setnames(scf83b, old2, new2) | |
setnames(scf83b, old3, new3) | |
setnames(scf83b, old4, new4) | |
scf83b[, resdbt := `other mortgages` + `land contracts/notes`] | |
## frequency: 5 ... monthly, 6 ... annually | |
scf83b[, PAYMORT1 := 0] | |
scf83b[freq1 == 5, PAYMORT1 := paym1] | |
scf83b[freq1 == 6, PAYMORT1 := paym1 / 12] | |
scf83b[, PAYMORT2 := 0] | |
scf83b[freq2 == 5, PAYMORT2 := paym2] | |
scf83b[freq2 == 6, PAYMORT2 := paym2 / 12] | |
scf83b[, PAYMORT3 := 0] | |
scf83b[freq3 == 5, PAYMORT3 := paym3] | |
scf83b[freq3 == 6, PAYMORT3 := paym3 / 12] | |
scf83b[, paymorto := 0] | |
paynames = c(paste0("PAYMORT", 1:3), "paymorto") | |
scf83b[, kids := kids_total - kids_away] | |
scf83b[, adults := no_18_plus + no_18_minus - kids] | |
scf83 = scf83b[!is.na(asset) & !is.na(debt), .SD, .SDcols = c(new, new2, | |
"resdbt", paynames, | |
"kids", "adults")] | |
scf83[, networth := asset - debt] | |
scf83[, YY1 := Y1] | |
### SCF 1986 | |
scf86b <- data.table(read_dta("raw-data-2018/scf86b.dta")) | |
old_86 <- c("c1449", "c1455", "c1457", "c1", "c1012", "b1", "c1009", "c1301") | |
new_86 <- c("asset", "debt", "networth", "Y1", "wgt", "Y1_83", "wgt_83", "income") | |
old_83 <- c("c1450", "c1456", "c1458") | |
new_83 <- paste0(new_86[1:3], "83") | |
old2_86 <- c("c1506", "c1512", "c1525", "c1423") | |
new2_86 <- c("monthly rent", "houses", "NH_MORT", "other mortgages") | |
old2_83 <- c( "c1513", "c1526", "c1424") | |
new2_83 <- paste0(new2_86[2:4], "83") | |
old3_86 <- c("c1535", "c1550", "c1536", "c1551") | |
new3_86 <- c("paym1", "paym2", "freq1", "freq2") | |
## original mortgage, year purchased, purchase price, available | |
#old4_83<- c("b3101", "b3102", "b3104", "b3109", "c1125", "b3108", "b3107") | |
old4_86 <- c("c1101", "c1102", "c1104", "c1112", "c1126", "c1111", "c1110") | |
new4_86 <- c("fam_size", "no_18_plus", "no_18_minus", "family_type", "marital", "kids_total", "kids_away") | |
## marital == 1 ... married, 6 and 7 ... life with spouse | |
## c1104 ... persons 18- | |
## c1107 ... (grand-)children 18+ | |
setnames(scf86b, old_86, new_86) | |
setnames(scf86b, old2_86, new2_86) | |
setnames(scf86b, old3_86, new3_86) | |
setnames(scf86b, old4_86, new4_86) | |
setnames(scf86b, old_83, new_83) | |
setnames(scf86b, old2_83, new2_83) | |
scf86b[, resdbt := `other mortgages`] | |
## frequency: 5 ... monthly, 6 ... annually | |
scf86b[, PAYMORT1 := 0] | |
scf86b[freq1 == 5, PAYMORT1 := paym1] | |
scf86b[freq1 == 6, PAYMORT1 := paym1 / 12] | |
scf86b[, PAYMORT2 := 0] | |
scf86b[freq2 == 5, PAYMORT2 := paym2] | |
scf86b[freq2 == 6, PAYMORT2 := paym2 / 12] | |
scf86b[, PAYMORT3 := 0] | |
scf86b[, paymorto := 0] | |
paynames = c(paste0("PAYMORT", 1:3), "paymorto") | |
scf86b[, kids := no_18_minus + c1107] | |
scf86b[, adults := fam_size - kids] | |
scf86 = scf86b[!is.na(asset) & !is.na(debt), .SD, .SDcols = c(new_83, new_86, new2_83, new2_86, | |
"resdbt", paynames, | |
"kids", "adults")] | |
scf86[, YY1 := Y1] | |
### note: 1983 and 1986 are in original dollars, since 1989 in 2016 dollars | |
library(quantmod) | |
getSymbols('CPIAUCSL',src='FRED') | |
cpi_xts = apply.yearly(CPIAUCSL, colMeans) | |
cpi.dt = data.table(year = year(time(cpi_xts)), cpi = coredata(cpi_xts)) | |
setnames(cpi.dt, "cpi.CPIAUCSL", "cpi") | |
cpi.dt[, cpi_2016 := cpi/cpi[year==2016][1L]] | |
cpi_factor_1983 = cpi.dt[year==1983, cpi_2016] | |
cpi_factor_1986 = cpi.dt[year==1986, cpi_2016] | |
## scale all numeric variables | |
cols <- colnames(scf83)[!colnames(scf83) %in% c("Y1", "wgt", "YY1", "kids", "adults")] | |
scf83[, (cols) := lapply(.SD, function(x) x/cpi_factor_1983), .SDcols=cols] | |
cols <- colnames(scf86)[!colnames(scf86) %in% c("Y1", "wgt", "YY1", "kids", "adults")] | |
scf86[, (cols) := lapply(.SD, function(x) x/cpi_factor_1986), .SDcols=cols] | |
### Combining the data sets | |
datasets = list(scf89, scf92, scf95, scf98, scf01, scf04, scf07, scf10, scf13, scf16, scf83, scf86) | |
years = c(seq(from=1989, to=2016, by=3), 1983, 1986) | |
dt = data.table(datasets[[1]]) | |
dt[, year := years[1]] | |
## Rename 1989 version, it uses X instead of Y | |
setnames(dt, c("X1", "XX1"), c("Y1", "YY1")) | |
## add years to the identifiers | |
dt[, Z1 := paste0(Y1,"_",year)] | |
dt[, ZZ1 := paste0(YY1,"_",year)] | |
## set identifiers as keys | |
setkeyv(dt, c("Z1", "ZZ1")) | |
for (i in 2:length(years)) { | |
dt_next = data.table(datasets[[i]]) | |
dt_next[, year := years[i]] | |
dt_next[, Z1 := paste0(Y1,"_",year)] | |
dt_next[, ZZ1 := paste0(YY1,"_",year)] | |
setkeyv(dt_next, c("Z1", "ZZ1")) | |
dt = rbind(dt, dt_next, fill=TRUE) | |
} | |
scf = dt | |
save(scf, file="cleaned-data/scf_1983_2016.RData") | |
#write_dta(scf, "cleaned-data/scf_1983_2016.dta") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment