Created
October 13, 2019 14:32
-
-
Save johnburnmurdoch/0054858c00a2bc778c77e8586e9d234c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
needs(sjlabelled, tidyverse, haven, magrittr) | |
# Load wave 8 | |
USoc_indresp_8 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w8/h_indresp.dta", encoding = "latin1") | |
# Load all other waves | |
USoc_indresp_1 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w1/a_indresp.dta", encoding = "latin1") | |
USoc_indresp_2 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w2/b_indresp.dta", encoding = "latin1") | |
USoc_indresp_3 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w3/c_indresp.dta", encoding = "latin1") | |
USoc_indresp_4 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w4/d_indresp.dta", encoding = "latin1") | |
USoc_indresp_5 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w5/e_indresp.dta", encoding = "latin1") | |
USoc_indresp_6 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w6/f_indresp.dta", encoding = "latin1") | |
USoc_indresp_7 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w7/g_indresp.dta", encoding = "latin1") | |
# Join latest wave full dataset with driving license variable of all previous waves | |
USoc_indresp_8 %>% | |
dplyr::select(pidp, h_drive, h_vote8, h_indinub_xw) %>% | |
left_join(USoc_indresp_1 %>% dplyr::select(pidp, a_drive)) %>% | |
left_join(USoc_indresp_2 %>% dplyr::select(pidp, b_drive)) %>% | |
left_join(USoc_indresp_3 %>% dplyr::select(pidp, c_drive)) %>% | |
left_join(USoc_indresp_4 %>% dplyr::select(pidp, d_drive)) %>% | |
left_join(USoc_indresp_5 %>% dplyr::select(pidp, e_drive)) %>% | |
left_join(USoc_indresp_6 %>% dplyr::select(pidp, f_drive)) %>% | |
left_join(USoc_indresp_7 %>% dplyr::select(pidp, g_drive)) %>% | |
# Use party names instead of codes | |
mutate(h_vote8 = as_label(h_vote8)) %>% | |
# Pivot long, so each person ID is now associated with a series of driving licence data points, one for each wave, in a column | |
gather(wave, drive, c(2,5:ncol(.))) %>% | |
# Group by person id, weight and vote | |
group_by(pidp, h_indinub_xw, h_vote8) %>% | |
# For each person, set their driving licence data point to be "Yes" if "Yes" appears at all, otherwise "No". (I’m naively assuming here that nobody has lost their licence) | |
summarise(drive = ifelse(1 %in% drive, 1, 2)) %>% | |
ungroup() %>% | |
# Tally up all combos of driving licence x GE vote, using 'h_indinub_xw' weights (Per Sturgis and Jennings https://www.sciencedirect.com/science/article/pii/S026137941930071X?via%3Dihub) | |
count(drive, h_vote8, wt = h_indinub_xw) %>% | |
# Group by GE vote | |
group_by(h_vote8) %>% | |
# Convert weighted numbers to shares | |
mutate(share = n/sum(n)*100) %>% | |
ungroup() %>% | |
# Filter out missing data for driving licence | |
filter(is.finite(share) & drive > 0) %>% | |
# Recode driving licence from codes to names | |
mutate(drive = c("Yes", "No")[drive]) %>% | |
group_by(h_vote8) %>% | |
# Filter out parties with tiny samples, and missing GE vote data | |
filter(sum(n) > 96 & !h_vote8 %in% c("inapplicable", "refusal")) %>% | |
mutate(n = sum(n)) %>% | |
# Spread wide again | |
spread(drive, share) %>% | |
# Sort by % with a licence | |
arrange(desc(Yes)) | |
# As above, but doing vote by driving licence, instead of driving licence by vote | |
USoc_indresp_8 %>% | |
dplyr::select(pidp, h_drive, h_vote8, h_indinub_xw) %>% | |
left_join(USoc_indresp_1 %>% dplyr::select(pidp, a_drive)) %>% | |
left_join(USoc_indresp_2 %>% dplyr::select(pidp, b_drive)) %>% | |
left_join(USoc_indresp_3 %>% dplyr::select(pidp, c_drive)) %>% | |
left_join(USoc_indresp_4 %>% dplyr::select(pidp, d_drive)) %>% | |
left_join(USoc_indresp_5 %>% dplyr::select(pidp, e_drive)) %>% | |
left_join(USoc_indresp_6 %>% dplyr::select(pidp, f_drive)) %>% | |
left_join(USoc_indresp_7 %>% dplyr::select(pidp, g_drive)) %>% | |
mutate(h_vote8 = as_label(h_vote8)) %>% | |
gather(wave, drive, c(2,5:ncol(.))) %>% | |
group_by(pidp, h_indinub_xw, h_vote8) %>% | |
summarise(drive = ifelse(1 %in% drive, 1, 2)) %>% | |
ungroup() %>% | |
count(drive, h_vote8, wt = h_indinub_xw) %>% | |
filter(drive > 0) %>% | |
mutate(drive = c("Yes", "No")[drive]) %>% | |
filter(!h_vote8 %in% c("inapplicable", "refusal")) %>% | |
group_by(drive) %>% | |
mutate(share = n/sum(n)*100) %>% | |
View |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment