greimel · April 22, 2020 09:46
diff --git a/joining.R b/joining.R
 library("haven")
 library("data.table")
 #library("ggplot2")
 wd <- getwd()
 #setwd("scf-data")
 #setwd(wd)
 ### SCF 1989--2016
 scf89 <- read_dta("raw-data-2018/rscfp1989.dta")
 scf92 <- read_dta("raw-data-2018/rscfp1992.dta")
 scf95 <- read_dta("raw-data-2018/rscfp1995.dta")
 scf98 <- read_dta("raw-data-2018/rscfp1998.dta")
 scf01 <- read_dta("raw-data-2018/rscfp2001.dta")
 scf04 <- read_dta("raw-data-2018/rscfp2004.dta")
 scf07 <- read_dta("raw-data-2018/rscfp2007.dta")
 scf10 <- read_dta("raw-data-2018/rscfp2010.dta")
 scf13 <- read_dta("raw-data-2018/rscfp2013.dta")
 scf16 <- read_dta("raw-data-2018/rscfp2016.dta")


 ### SCF 1983
 scf83b <- data.table(read_dta("raw-data-2018/scf83b.dta"))
 old = c("b3305", "b3320", "v1", "b3005", "b3201")
 new = c("asset", "debt", "Y1", "wgt", "income")

 old2 = c("b3705",        "b3708",  "b4001",   "b3802",           "b3602")
 new2 = c("monthly rent", "houses", "NH_MORT",  "other mortgages", "land contracts/notes")

 old3 <- c("b3814", "b3831", "b3848", "b3815", "b3832", "b3849")
 new3 <- c("paym1", "paym2", "paym3", "freq1", "freq2", "freq3")
 ## original mortgage, year purchased, purchase price, available

 old4 <- c("b3101",    "b3102",      "b3104",       "b3109",       "b3112",   "b3108",      "b3107")
 new4 <- c("fam_size", "no_18_plus", "no_18_minus", "family_type", "marital", "kids_total", "kids_away")
 setnames(scf83b, old, new)
 setnames(scf83b, old2, new2)
 setnames(scf83b, old3, new3)
 setnames(scf83b, old4, new4)

 scf83b[, resdbt := `other mortgages` + `land contracts/notes`]

 ## frequency: 5 ... monthly, 6 ... annually
 scf83b[, PAYMORT1 := 0]
 scf83b[freq1 == 5, PAYMORT1 := paym1]
 scf83b[freq1 == 6, PAYMORT1 := paym1 / 12]
 scf83b[, PAYMORT2 := 0]
 scf83b[freq2 == 5, PAYMORT2 := paym2]
 scf83b[freq2 == 6, PAYMORT2 := paym2 / 12]
 scf83b[, PAYMORT3 := 0]
 scf83b[freq3 == 5, PAYMORT3 := paym3]
 scf83b[freq3 == 6, PAYMORT3 := paym3 / 12]
 scf83b[, paymorto := 0]

 paynames = c(paste0("PAYMORT", 1:3), "paymorto")

 scf83b[, kids := kids_total - kids_away]
 scf83b[, adults := no_18_plus + no_18_minus - kids]

 scf83 = scf83b[!is.na(asset) & !is.na(debt), .SD, .SDcols = c(new, new2,
                                                              "resdbt", paynames,
                                                              "kids", "adults")]
 scf83[, networth := asset - debt]
 scf83[, YY1 := Y1]

 ### SCF 1986
 scf86b <- data.table(read_dta("raw-data-2018/scf86b.dta"))

 old_86 <- c("c1449", "c1455",   "c1457", "c1",  "c1012",  "b1", "c1009",   "c1301") 
 new_86 <- c("asset", "debt", "networth", "Y1", "wgt", "Y1_83", "wgt_83", "income")

 old_83 <- c("c1450", "c1456", "c1458")
 new_83 <- paste0(new_86[1:3], "83")


 old2_86 <- c("c1506",        "c1512",  "c1525",   "c1423")
 new2_86 <- c("monthly rent", "houses", "NH_MORT", "other mortgages")

 old2_83 <- c(                "c1513",  "c1526",   "c1424")
 new2_83 <- paste0(new2_86[2:4], "83")

 old3_86 <- c("c1535", "c1550", "c1536", "c1551")
 new3_86 <- c("paym1", "paym2", "freq1", "freq2")
 ## original mortgage, year purchased, purchase price, available

 #old4_83<- c("b3101",    "b3102",      "b3104",       "b3109",       "c1125",   "b3108",      "b3107")
 old4_86 <- c("c1101",    "c1102",      "c1104",       "c1112",       "c1126",   "c1111",      "c1110")
 new4_86 <- c("fam_size", "no_18_plus", "no_18_minus", "family_type", "marital", "kids_total", "kids_away")

 ## marital == 1 ... married, 6 and 7 ... life with spouse
 ## c1104 ... persons 18-
 ## c1107 ... (grand-)children 18+
 setnames(scf86b, old_86, new_86)
 setnames(scf86b, old2_86, new2_86)
 setnames(scf86b, old3_86, new3_86)
 setnames(scf86b, old4_86, new4_86)

 setnames(scf86b, old_83,  new_83)
 setnames(scf86b, old2_83, new2_83)

 scf86b[, resdbt := `other mortgages`]

 ## frequency: 5 ... monthly, 6 ... annually
 scf86b[, PAYMORT1 := 0]
 scf86b[freq1 == 5, PAYMORT1 := paym1]
 scf86b[freq1 == 6, PAYMORT1 := paym1 / 12]
 scf86b[, PAYMORT2 := 0]
 scf86b[freq2 == 5, PAYMORT2 := paym2]
 scf86b[freq2 == 6, PAYMORT2 := paym2 / 12]
 scf86b[, PAYMORT3 := 0]
 scf86b[, paymorto := 0]

 paynames = c(paste0("PAYMORT", 1:3), "paymorto")

 scf86b[, kids := no_18_minus + c1107]
 scf86b[, adults := fam_size - kids]

 scf86 = scf86b[!is.na(asset) & !is.na(debt), .SD, .SDcols = c(new_83, new_86, new2_83, new2_86,
                                                              "resdbt", paynames,
                                                              "kids", "adults")]

 scf86[, YY1 := Y1]


 ### note: 1983 and 1986 are in original dollars, since 1989 in 2016 dollars
 library(quantmod)
 getSymbols('CPIAUCSL',src='FRED')
 cpi_xts = apply.yearly(CPIAUCSL, colMeans)
 cpi.dt = data.table(year = year(time(cpi_xts)), cpi = coredata(cpi_xts))
 setnames(cpi.dt, "cpi.CPIAUCSL", "cpi")
 cpi.dt[, cpi_2016 := cpi/cpi[year==2016][1L]]

 cpi_factor_1983 = cpi.dt[year==1983, cpi_2016]
 cpi_factor_1986 = cpi.dt[year==1986, cpi_2016]

 ## scale all numeric variables
 cols <- colnames(scf83)[!colnames(scf83) %in% c("Y1", "wgt", "YY1", "kids", "adults")]
 scf83[, (cols) := lapply(.SD, function(x) x/cpi_factor_1983), .SDcols=cols]
 cols <- colnames(scf86)[!colnames(scf86) %in% c("Y1", "wgt", "YY1", "kids", "adults")]
 scf86[, (cols) := lapply(.SD, function(x) x/cpi_factor_1986), .SDcols=cols]

 ### Combining the data sets
 datasets = list(scf89, scf92, scf95, scf98, scf01, scf04, scf07, scf10, scf13, scf16, scf83, scf86)
 years = c(seq(from=1989, to=2016, by=3), 1983, 1986)

 dt = data.table(datasets[[1]])
 dt[, year := years[1]]
 ## Rename 1989 version, it uses X instead of Y
 setnames(dt, c("X1", "XX1"), c("Y1", "YY1"))
 ## add years to the identifiers
 dt[, Z1 := paste0(Y1,"_",year)]
 dt[, ZZ1 := paste0(YY1,"_",year)]
 ## set identifiers as keys
 setkeyv(dt, c("Z1", "ZZ1"))

 for (i in 2:length(years)) {
    dt_next = data.table(datasets[[i]])
    dt_next[, year := years[i]]
    dt_next[, Z1 := paste0(Y1,"_",year)]
    dt_next[, ZZ1 := paste0(YY1,"_",year)]
    setkeyv(dt_next, c("Z1", "ZZ1"))
    dt = rbind(dt, dt_next, fill=TRUE)
 }

 scf = dt
 save(scf, file="cleaned-data/scf_1983_2016.RData")
 #write_dta(scf, "cleaned-data/scf_1983_2016.dta")
	library("haven")
	library("data.table")
	#library("ggplot2")
	wd <- getwd()
	#setwd("scf-data")
	#setwd(wd)
	### SCF 1989--2016
	scf89 <- read_dta("raw-data-2018/rscfp1989.dta")
	scf92 <- read_dta("raw-data-2018/rscfp1992.dta")
	scf95 <- read_dta("raw-data-2018/rscfp1995.dta")
	scf98 <- read_dta("raw-data-2018/rscfp1998.dta")
	scf01 <- read_dta("raw-data-2018/rscfp2001.dta")
	scf04 <- read_dta("raw-data-2018/rscfp2004.dta")
	scf07 <- read_dta("raw-data-2018/rscfp2007.dta")
	scf10 <- read_dta("raw-data-2018/rscfp2010.dta")
	scf13 <- read_dta("raw-data-2018/rscfp2013.dta")
	scf16 <- read_dta("raw-data-2018/rscfp2016.dta")


	### SCF 1983
	scf83b <- data.table(read_dta("raw-data-2018/scf83b.dta"))
	old = c("b3305", "b3320", "v1", "b3005", "b3201")
	new = c("asset", "debt", "Y1", "wgt", "income")

	old2 = c("b3705", "b3708", "b4001", "b3802", "b3602")
	new2 = c("monthly rent", "houses", "NH_MORT", "other mortgages", "land contracts/notes")

	old3 <- c("b3814", "b3831", "b3848", "b3815", "b3832", "b3849")
	new3 <- c("paym1", "paym2", "paym3", "freq1", "freq2", "freq3")
	## original mortgage, year purchased, purchase price, available

	old4 <- c("b3101", "b3102", "b3104", "b3109", "b3112", "b3108", "b3107")
	new4 <- c("fam_size", "no_18_plus", "no_18_minus", "family_type", "marital", "kids_total", "kids_away")
	setnames(scf83b, old, new)
	setnames(scf83b, old2, new2)
	setnames(scf83b, old3, new3)
	setnames(scf83b, old4, new4)

	scf83b[, resdbt := `other mortgages` + `land contracts/notes`]

	## frequency: 5 ... monthly, 6 ... annually
	scf83b[, PAYMORT1 := 0]
	scf83b[freq1 == 5, PAYMORT1 := paym1]
	scf83b[freq1 == 6, PAYMORT1 := paym1 / 12]
	scf83b[, PAYMORT2 := 0]
	scf83b[freq2 == 5, PAYMORT2 := paym2]
	scf83b[freq2 == 6, PAYMORT2 := paym2 / 12]
	scf83b[, PAYMORT3 := 0]
	scf83b[freq3 == 5, PAYMORT3 := paym3]
	scf83b[freq3 == 6, PAYMORT3 := paym3 / 12]
	scf83b[, paymorto := 0]

	paynames = c(paste0("PAYMORT", 1:3), "paymorto")

	scf83b[, kids := kids_total - kids_away]
	scf83b[, adults := no_18_plus + no_18_minus - kids]

	scf83 = scf83b[!is.na(asset) & !is.na(debt), .SD, .SDcols = c(new, new2,
	"resdbt", paynames,
	"kids", "adults")]
	scf83[, networth := asset - debt]
	scf83[, YY1 := Y1]

	### SCF 1986
	scf86b <- data.table(read_dta("raw-data-2018/scf86b.dta"))

	old_86 <- c("c1449", "c1455", "c1457", "c1", "c1012", "b1", "c1009", "c1301")
	new_86 <- c("asset", "debt", "networth", "Y1", "wgt", "Y1_83", "wgt_83", "income")

	old_83 <- c("c1450", "c1456", "c1458")
	new_83 <- paste0(new_86[1:3], "83")


	old2_86 <- c("c1506", "c1512", "c1525", "c1423")
	new2_86 <- c("monthly rent", "houses", "NH_MORT", "other mortgages")

	old2_83 <- c( "c1513", "c1526", "c1424")
	new2_83 <- paste0(new2_86[2:4], "83")

	old3_86 <- c("c1535", "c1550", "c1536", "c1551")
	new3_86 <- c("paym1", "paym2", "freq1", "freq2")
	## original mortgage, year purchased, purchase price, available

	#old4_83<- c("b3101", "b3102", "b3104", "b3109", "c1125", "b3108", "b3107")
	old4_86 <- c("c1101", "c1102", "c1104", "c1112", "c1126", "c1111", "c1110")
	new4_86 <- c("fam_size", "no_18_plus", "no_18_minus", "family_type", "marital", "kids_total", "kids_away")

	## marital == 1 ... married, 6 and 7 ... life with spouse
	## c1104 ... persons 18-
	## c1107 ... (grand-)children 18+
	setnames(scf86b, old_86, new_86)
	setnames(scf86b, old2_86, new2_86)
	setnames(scf86b, old3_86, new3_86)
	setnames(scf86b, old4_86, new4_86)

	setnames(scf86b, old_83, new_83)
	setnames(scf86b, old2_83, new2_83)

	scf86b[, resdbt := `other mortgages`]

	## frequency: 5 ... monthly, 6 ... annually
	scf86b[, PAYMORT1 := 0]
	scf86b[freq1 == 5, PAYMORT1 := paym1]
	scf86b[freq1 == 6, PAYMORT1 := paym1 / 12]
	scf86b[, PAYMORT2 := 0]
	scf86b[freq2 == 5, PAYMORT2 := paym2]
	scf86b[freq2 == 6, PAYMORT2 := paym2 / 12]
	scf86b[, PAYMORT3 := 0]
	scf86b[, paymorto := 0]

	paynames = c(paste0("PAYMORT", 1:3), "paymorto")

	scf86b[, kids := no_18_minus + c1107]
	scf86b[, adults := fam_size - kids]

	scf86 = scf86b[!is.na(asset) & !is.na(debt), .SD, .SDcols = c(new_83, new_86, new2_83, new2_86,
	"resdbt", paynames,
	"kids", "adults")]

	scf86[, YY1 := Y1]


	### note: 1983 and 1986 are in original dollars, since 1989 in 2016 dollars
	library(quantmod)
	getSymbols('CPIAUCSL',src='FRED')
	cpi_xts = apply.yearly(CPIAUCSL, colMeans)
	cpi.dt = data.table(year = year(time(cpi_xts)), cpi = coredata(cpi_xts))
	setnames(cpi.dt, "cpi.CPIAUCSL", "cpi")
	cpi.dt[, cpi_2016 := cpi/cpi[year==2016][1L]]

	cpi_factor_1983 = cpi.dt[year==1983, cpi_2016]
	cpi_factor_1986 = cpi.dt[year==1986, cpi_2016]

	## scale all numeric variables
	cols <- colnames(scf83)[!colnames(scf83) %in% c("Y1", "wgt", "YY1", "kids", "adults")]
	scf83[, (cols) := lapply(.SD, function(x) x/cpi_factor_1983), .SDcols=cols]
	cols <- colnames(scf86)[!colnames(scf86) %in% c("Y1", "wgt", "YY1", "kids", "adults")]
	scf86[, (cols) := lapply(.SD, function(x) x/cpi_factor_1986), .SDcols=cols]

	### Combining the data sets
	datasets = list(scf89, scf92, scf95, scf98, scf01, scf04, scf07, scf10, scf13, scf16, scf83, scf86)
	years = c(seq(from=1989, to=2016, by=3), 1983, 1986)

	dt = data.table(datasets[[1]])
	dt[, year := years[1]]
	## Rename 1989 version, it uses X instead of Y
	setnames(dt, c("X1", "XX1"), c("Y1", "YY1"))
	## add years to the identifiers
	dt[, Z1 := paste0(Y1,"_",year)]
	dt[, ZZ1 := paste0(YY1,"_",year)]
	## set identifiers as keys
	setkeyv(dt, c("Z1", "ZZ1"))

	for (i in 2:length(years)) {
	dt_next = data.table(datasets[[i]])
	dt_next[, year := years[i]]
	dt_next[, Z1 := paste0(Y1,"_",year)]
	dt_next[, ZZ1 := paste0(YY1,"_",year)]
	setkeyv(dt_next, c("Z1", "ZZ1"))
	dt = rbind(dt, dt_next, fill=TRUE)
	}

	scf = dt
	save(scf, file="cleaned-data/scf_1983_2016.RData")
	#write_dta(scf, "cleaned-data/scf_1983_2016.dta")
No results found