Created
November 1, 2023 14:13
-
-
Save sbfnk/bda1943548a643dee44928113a7ec935 to your computer and use it in GitHub Desktop.
Visualise some aspects of the contact data set
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library("tidyr") | |
| library("dplyr") | |
| library("cowplot") | |
| library("stringi") | |
| library("scales") | |
| library("ggplot2") | |
| library("here") | |
| dt_contacts <- readRDS("contacts.rds") | |
| dir.create(here("figures"), showWarnings = FALSE) | |
| for (type in c("conversational", "physical")) { | |
| ## describe contact data | |
| max_people <- dt_contacts |> | |
| group_by(participant_id, season) |> | |
| summarise(n = n(), .groups = "drop") |> | |
| group_by(participant_id) |> | |
| summarise( | |
| seasons = length(unique(season)), min = min(n), .groups = "drop" | |
| ) |> | |
| filter(seasons == 2) |> | |
| arrange(min) |> | |
| tail(n = 6) |> | |
| pull(participant_id) | |
| p <- ggplot( | |
| dt_contacts |> | |
| filter(participant_id %in% max_people), | |
| aes(x = date, y = .data[[type]], group = season) | |
| ) + | |
| geom_point() + | |
| geom_line() + | |
| scale_color_brewer(palette = "Set1") + | |
| facet_wrap(~participant_id) + | |
| scale_x_date("Season", | |
| breaks = date_breaks("1 year"), labels = date_format("%Y")) + | |
| scale_y_continuous(paste0("Number of ", type, " contacts")) + | |
| theme(legend.position = "none") | |
| save_plot( | |
| here("figures", paste0("example_users_", type, ".pdf")), p, | |
| base_aspect_ratio = 2 | |
| ) | |
| save_plot( | |
| here("figures", paste0("example_users_", type, ".png")), p, | |
| base_aspect_ratio = 2 | |
| ) | |
| means <- dt_contacts |> | |
| group_by(participant_id) |> | |
| mutate( | |
| mean = mean(.data[[type]]), | |
| var = var(.data[[type]]), | |
| type = .data[[type]] | |
| ) |> | |
| ungroup() | |
| var_pop <- means |> | |
| summarise( | |
| var = var(mean), | |
| mean = mean(mean), | |
| .groups = "drop" | |
| ) | |
| p <- ggplot(means, aes(x = mean)) + | |
| geom_histogram(binwidth = 1) + | |
| coord_cartesian(xlim = c(0, 25)) + | |
| scale_y_continuous("Number of surveys") + | |
| scale_x_continuous("Mean number of contacts") | |
| save_plot(here("figures", paste0(type, "_dist.pdf")), p) | |
| save_plot(here("figures", paste0(type, "_dist.png")), p) | |
| max_contacts <- | |
| min( | |
| 100, | |
| max( | |
| means |> | |
| filter(mean <= 100, !is.na(var)) |> | |
| pull(mean) | |
| ) | |
| ) | |
| max_meanvar <- max(log10(means$mean), log10(means$var), na.rm = TRUE) | |
| min_meanvar <- min( | |
| means |> | |
| filter(mean > 0) |> | |
| pull(mean) |> | |
| log10(), | |
| means |> | |
| filter(var > 0) |> | |
| pull(var) |> | |
| log10() | |
| ) | |
| p <- ggplot( | |
| means |> | |
| filter(mean > 0, var > 0), | |
| aes(x = log10(mean), y = log10(var)) | |
| ) + | |
| geom_jitter() + | |
| geom_smooth(method = lm, formula = y ~ x) + | |
| expand_limits(x = c(min_meanvar, max_meanvar * 1.1), | |
| y = c(min_meanvar, max_meanvar * 1.1)) + | |
| geom_line(data = data.frame(mean = 10 ** c(min_meanvar, max_meanvar), | |
| var = 10 ** c(min_meanvar, max_meanvar)), | |
| linetype = "dashed") + | |
| geom_point(data = data.frame(var_pop), color = "red") + | |
| scale_x_continuous("Mean (log-scale)") + | |
| scale_y_continuous("Variance (log-scale)") + | |
| ggtitle(paste0("Number of ", type, " contacts")) | |
| save_plot(here("figures", paste0(type, "_contacts_mean_var.pdf")), p) | |
| save_plot(here("figures", paste0(type, "_contacts_mean_var.png")), p) | |
| ## contact with respect to background | |
| bg_means <- dt_contacts |> | |
| group_by(participant_id) |> | |
| mutate( | |
| mean = mean(.data[[type]]), | |
| var = var(.data[[type]]), | |
| type = .data[[type]] | |
| ) |> | |
| ungroup() | |
| max_y <- bg_means |> | |
| pull(mean) |> | |
| quantile(probs = 0.95, na.rm = TRUE) | |
| type_settings <- colnames(dt_contacts) |> | |
| grep(pattern = paste0("^", type, "."), value = TRUE) |> | |
| grep(pattern = "^[^0-9]*$", value = TRUE) | |
| ## setting | |
| formulae <- paste0("~mean(", type_settings, ")") |> | |
| lapply(FUN = as.formula) |> | |
| do.call(what = quos) | |
| bg_type_means <- dt_contacts |> | |
| group_by(participant_id) |> | |
| mutate_at(c(type_settings), mean) |> | |
| pivot_longer( | |
| names_to = "setting", values_to = "mean", type_settings | |
| ) |> | |
| mutate(setting = sub(".*\\.", "", setting)) |> | |
| mutate(setting = ifelse(setting == "work", "work/school", setting)) | |
| max_type_y <- bg_type_means |> | |
| pull(mean) |> | |
| quantile(probs = 0.95, na.rm = TRUE) | |
| ## age | |
| p <- ggplot( | |
| bg_means |> | |
| filter(!is.na(age)), | |
| aes(x = agegroup, y = mean) | |
| ) + | |
| geom_boxplot() + | |
| coord_cartesian(ylim = c(0, max_y)) + | |
| scale_y_continuous(paste0("Number of ", type, " contacts")) + | |
| scale_x_discrete("Age group") | |
| save_plot(here("figures", paste0(type, "_age.pdf")), p) | |
| save_plot(here("figures", paste0(type, "_age.png")), p) | |
| p <- ggplot( | |
| bg_type_means |> | |
| filter(!is.na(age)), | |
| aes(x = agegroup, y = mean, color = setting) | |
| ) + | |
| geom_boxplot() + | |
| coord_cartesian(ylim = c(0, max_y)) + | |
| scale_y_continuous(paste0("Number of ", type, " contacts")) + | |
| scale_x_discrete("Age group") + | |
| scale_color_brewer("", palette = "Set1") + | |
| theme(legend.position = "top") | |
| save_plot(here("figures", paste0(type, "_age_setting.pdf")), p) | |
| save_plot(here("figures", paste0(type, "_age_setting.png")), p) | |
| ## education | |
| p <- ggplot( | |
| bg_type_means |> | |
| filter(!is.na(highest.education)), | |
| aes(x = highest.education, y = mean) | |
| ) + | |
| geom_boxplot() + | |
| coord_cartesian(ylim = c(0, max_y)) + | |
| scale_y_continuous(paste0("Number of ", type, " contacts")) + | |
| scale_x_discrete( | |
| "Highest education level", | |
| labels = c("None", "GCSE", "A-levels", "BSc", "MSc", "Student") | |
| ) | |
| save_plot(here("figures", paste0(type, "_education.pdf")), p) | |
| save_plot(here("figures", paste0(type, "_education.png")), p) | |
| p <- ggplot( | |
| bg_type_means |> | |
| filter(!is.na(highest.education)), | |
| aes(x = highest.education, y = mean, color = setting) | |
| ) + | |
| geom_boxplot() + | |
| coord_cartesian(ylim = c(0, max_y)) + | |
| scale_y_continuous(paste0("Number of ", type, " contacts")) + | |
| scale_x_discrete( | |
| "Highest education level", | |
| labels = c("None", "GCSE", "A-levels", "BSc", "MSc", "Student") | |
| ) + | |
| scale_color_brewer("", palette = "Set1") + | |
| theme(legend.position = "top") | |
| save_plot(here("figures", paste0(type, "_education_setting.pdf")), p) | |
| save_plot(here("figures", paste0(type, "_education_setting.png")), p) | |
| ## students | |
| p <- ggplot( | |
| bg_type_means |> | |
| filter(!is.na(education.stillin)), | |
| aes(x = education.stillin, y = mean) | |
| ) + | |
| geom_boxplot() + | |
| coord_cartesian(ylim = c(0, max_y)) + | |
| scale_y_continuous(paste0("Number of ", type, " contacts")) + | |
| scale_x_discrete("In education", labels = c("No", "Yes")) | |
| save_plot(here("figures", paste0(type, "_students.pdf")), p) | |
| save_plot(here("figures", paste0(type, "_students.png")), p) | |
| p <- ggplot( | |
| bg_type_means |> | |
| filter(!is.na(education.stillin)), | |
| aes(x = education.stillin, y = mean, color = setting) | |
| ) + | |
| geom_boxplot() + | |
| coord_cartesian(ylim = c(0, max_y)) + | |
| scale_y_continuous(paste0("Number of ", type, " contacts")) + | |
| scale_x_discrete("In education", labels = c("No", "Yes")) + | |
| scale_color_brewer("", palette = "Set1") + | |
| theme(legend.position = "top") | |
| save_plot(here("figures", paste0(type, "_students_setting.pdf")), p) | |
| save_plot(here("figures", paste0(type, "_students_setting.png")), p) | |
| ## rural/urban? postcode? | |
| p <- ggplot( | |
| bg_means |> | |
| filter(!is.na(urban.rural)), | |
| aes(x = urban.rural, y = mean) | |
| ) + | |
| geom_boxplot() + | |
| coord_cartesian(ylim = c(0, max_y)) + | |
| scale_y_continuous(paste0("Number of ", type, " contacts")) + | |
| scale_x_discrete("Settlement type") | |
| save_plot(here("figures", paste0(type, "_settlement.pdf")), p) | |
| save_plot(here("figures", paste0(type, "_settlement.png")), p) | |
| p <- ggplot( | |
| bg_type_means |> | |
| filter(!is.na(urban.rural)), | |
| aes(x = urban.rural, y = mean, color = setting) | |
| ) + | |
| geom_boxplot() + | |
| coord_cartesian(ylim = c(0, max_y)) + | |
| scale_y_continuous(paste0("Number of ", type, " contacts")) + | |
| scale_x_discrete("Settlement type") + | |
| scale_color_brewer("", palette = "Set1") + | |
| theme(legend.position = "top") | |
| save_plot(here("figures", paste0(type, "_settlement_setting.pdf")), p) | |
| save_plot(here("figures", paste0(type, "_settlement_setting.png")), p) | |
| p <- ggplot( | |
| bg_means |> | |
| filter(!is.na(work.urban.rural)), | |
| aes(x = work.urban.rural, y = mean) | |
| ) + | |
| geom_boxplot() + | |
| coord_cartesian(ylim = c(0, max_y)) + | |
| scale_y_continuous(paste0("Number of ", type, " contacts")) + | |
| scale_x_discrete("Work settlement type") | |
| save_plot(here("figures", paste0(type, "_work_settlement.pdf")), p) | |
| save_plot(here("figures", paste0(type, "_work_settlement.png")), p) | |
| p <- ggplot( | |
| bg_type_means |> | |
| filter(!is.na(work.urban.rural)), | |
| aes(x = work.urban.rural, y = mean, color = setting) | |
| ) + | |
| geom_boxplot() + | |
| coord_cartesian(ylim = c(0, max_y)) + | |
| scale_y_continuous(paste0("Number of ", type, " contacts")) + | |
| scale_x_discrete("Work settlement type") + | |
| scale_color_brewer("", palette = "Set1") + | |
| theme(legend.position = "top") | |
| save_plot(here("figures", paste0(type, "_work_settlement_setting.pdf")), p) | |
| save_plot(here("figures", paste0(type, "_work_settlement_setting.png")), p) | |
| ## main activity | |
| p <- ggplot( | |
| bg_means |> | |
| filter(!is.na(main.activity)), | |
| aes(x = main.activity, y = mean) | |
| ) + | |
| geom_boxplot() + | |
| coord_cartesian(ylim = c(0, max_y)) + | |
| scale_y_continuous(paste0("Number of ", type, " contacts")) + | |
| scale_x_discrete( | |
| "Main activity", | |
| labels = c( | |
| "Full time employed", | |
| "Part-time employed", | |
| "Self-employed", | |
| "School", | |
| "Home-maker", | |
| "Unemployed", | |
| "Long-term leave", | |
| "Retired", | |
| "Other" | |
| ) | |
| ) + | |
| theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) | |
| save_plot(here("figures", paste0(type, "_main_activity.pdf")), p) | |
| save_plot(here("figures", paste0(type, "_main_activity.png")), p) | |
| p <- ggplot( | |
| bg_type_means |> | |
| filter(!is.na(main.activity)), | |
| aes(x = main.activity, y = mean, color = setting) | |
| ) + | |
| geom_boxplot() + | |
| coord_cartesian(ylim = c(0, max_y)) + | |
| scale_y_continuous(paste0("Number of ", type, " contacts")) + | |
| scale_x_discrete( | |
| "Main activity", | |
| labels = c( | |
| "Full time employed", | |
| "Part-time employed", | |
| "Self-employed", | |
| "School", | |
| "Home-maker", | |
| "Unemployed", | |
| "Long-term leave", | |
| "Retired", | |
| "Other" | |
| ) | |
| ) + | |
| theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) + | |
| scale_color_brewer("", palette = "Set1") + | |
| theme(legend.position = "top") | |
| save_plot(here("figures", paste0(type, "_main_activity_setting.pdf")), p) | |
| save_plot(here("figures", paste0(type, "_main_activity_setting.png")), p) | |
| ## occupation | |
| p <- ggplot( | |
| bg_means |> | |
| filter(!is.na(occupation)), | |
| aes(x = occupation, y = mean) | |
| ) + | |
| geom_boxplot() + | |
| coord_cartesian(ylim = c(0, max_y)) + | |
| scale_y_continuous(paste0("Number of ", type, " contacts")) + | |
| scale_x_discrete( | |
| "Occupation", | |
| labels = c( | |
| "Professional", "Office worker", | |
| "Retail", "Skilled manual", | |
| "Other manual", "Other" | |
| ) | |
| ) + | |
| theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) | |
| save_plot(here("figures", paste0(type, "_occupation.pdf")), p) | |
| save_plot(here("figures", paste0(type, "_occupation.png")), p) | |
| ## household size? | |
| bg_means <- bg_means |> | |
| mutate( | |
| hh_group = factor( | |
| ifelse(nb.household < 6, as.character(nb.household), "6+"), | |
| levels = c(as.character(seq_len(6)-1), "6+") | |
| ) | |
| ) | |
| ## household size? | |
| bg_type_means <- bg_type_means |> | |
| mutate( | |
| hh_group = factor( | |
| ifelse(nb.household < 6, as.character(nb.household), "6+"), | |
| levels = c(as.character(seq_len(6)-1), "6+") | |
| ) | |
| ) | |
| p <- ggplot(bg_means, aes(x = hh_group, y = mean)) + | |
| geom_boxplot() + | |
| coord_cartesian(ylim = c(0, max_y)) + | |
| scale_y_continuous(paste0("Number of ", type, " contacts")) + | |
| scale_x_discrete("Household size") | |
| save_plot(here("figures", paste0(type, "_hh_size.pdf")), p) | |
| save_plot(here("figures", paste0(type, "_hh_size.png")), p) | |
| p <- ggplot(bg_type_means, aes(x = hh_group, y = mean, color = setting)) + | |
| geom_boxplot() + | |
| coord_cartesian(ylim = c(0, max_y)) + | |
| scale_y_continuous(paste0("Number of ", type, " contacts")) + | |
| scale_x_discrete("Household size") + | |
| scale_color_brewer("", palette = "Set1") + | |
| theme(legend.position = "top") | |
| save_plot(here("figures", paste0(type, "_hh_size_setting.pdf")), p) | |
| save_plot(here("figures", paste0(type, "_hh_size_setting.png")), p) | |
| bg_means <- bg_means |> | |
| mutate( | |
| hh_children_group = factor( | |
| ifelse( | |
| nb.household.children < 6, | |
| as.character(nb.household.children), | |
| "6+" | |
| ), | |
| levels = c(as.character(seq_len(6) - 1), "6+") | |
| ) | |
| ) | |
| bg_type_means <- bg_type_means |> | |
| mutate( | |
| hh_children_group = factor( | |
| ifelse( | |
| nb.household.children < 6, | |
| as.character(nb.household.children), "6+"), | |
| levels = c(as.character(seq_len(6) - 1), "6+") | |
| ) | |
| ) | |
| p <- ggplot(bg_means, aes(x = hh_children_group, y = mean)) + | |
| geom_boxplot() + | |
| coord_cartesian(ylim = c(0, max_y)) + | |
| scale_y_continuous(paste0("Number of ", type, " contacts")) + | |
| scale_x_discrete("Number of children in the household") | |
| save_plot(here("figures", paste0(type, "_hh_size_children.pdf")), p) | |
| save_plot(here("figures", paste0(type, "_hh_size_children.png")), p) | |
| p <- ggplot( | |
| bg_type_means, | |
| aes(x = hh_children_group, y = mean, color = setting) | |
| ) + | |
| geom_boxplot() + | |
| coord_cartesian(ylim = c(0, max_y)) + | |
| scale_y_continuous(paste0("Number of ", type, " contacts")) + | |
| scale_x_discrete("Number of children in the household") + | |
| scale_color_brewer("", palette = "Set1") + | |
| theme(legend.position = "top") | |
| save_plot(here("figures", paste0(type, "_hh_size_children_setting.pdf")), p) | |
| save_plot(here("figures", paste0(type, "_hh_size_children_setting.png")), p) | |
| ## regional differences | |
| p <- ggplot( | |
| bg_means |> | |
| filter(!is.na(country)), | |
| aes(x = country, y = mean) | |
| ) + | |
| geom_boxplot() + | |
| coord_cartesian(ylim = c(0, max_y)) + | |
| scale_y_continuous(paste0("Number of ", type, " contacts")) + | |
| scale_x_discrete( | |
| "Country", | |
| labels = stri_trans_totitle(gsub("_", " ", levels(bg_means$country))) | |
| ) | |
| save_plot(here("figures", paste0(type, "_country.pdf")), p) | |
| save_plot(here("figures", paste0(type, "_country.png")), p) | |
| p <- ggplot( | |
| bg_type_means |> | |
| filter(!is.na(country)), | |
| aes(x = country, y = mean, color = setting) | |
| ) + | |
| geom_boxplot() + | |
| coord_cartesian(ylim = c(0, max_y)) + | |
| scale_y_continuous(paste0("Number of ", type, " contacts")) + | |
| scale_x_discrete( | |
| "Country", | |
| labels = stri_trans_totitle(gsub("_", " ", levels(bg_means$country))) | |
| ) + | |
| scale_color_brewer("", palette = "Set1") + | |
| theme(legend.position = "top") | |
| save_plot(here("figures", paste0(type, "_country_setting.pdf")), p) | |
| save_plot(here("figures", paste0(type, "_country_setting.png")), p) | |
| p <- ggplot( | |
| bg_means |> | |
| filter(!is.na(region)), | |
| aes(x = region, y = mean) | |
| ) + | |
| geom_boxplot() + | |
| coord_cartesian(ylim = c(0, max_y)) + | |
| scale_y_continuous(paste0("Number of ", type, " contacts")) + | |
| scale_x_discrete( | |
| "Region", | |
| labels = stri_trans_totitle(gsub("_", " ", levels(bg_means$region))) | |
| ) + | |
| theme(axis.text.x = element_text(angle = 45,hjust = 1,vjust = 1)) | |
| save_plot(here("figures", paste0(type, "_region.pdf")), p) | |
| save_plot(here("figures", paste0(type, "_region.png")), p) | |
| p <- ggplot( | |
| bg_type_means |> | |
| filter(!is.na(region)), | |
| aes(x = region, y = mean, color = setting) | |
| ) + | |
| geom_boxplot() + | |
| coord_cartesian(ylim = c(0, max_y)) + | |
| scale_y_continuous(paste0("Number of ", type, " contacts")) + | |
| scale_x_discrete( | |
| "Region", | |
| labels = stri_trans_totitle(gsub("_", " ", levels(bg_means$region))) | |
| ) + | |
| theme( | |
| axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1), | |
| legend.position = "top" | |
| ) + | |
| scale_color_brewer("", palette = "Set1") | |
| save_plot(here("figures", paste0(type, "_region_setting.pdf")), p) | |
| save_plot(here("figures", paste0(type, "_region_setting.png")), p) | |
| } | |
| means <- dt_contacts |> | |
| group_by(participant_id) |> | |
| mutate( | |
| mean.physical = mean(physical), | |
| mean.conversational = mean(conversational) | |
| ) |> | |
| ungroup() |> | |
| pivot_longer( | |
| names_to = "type", values_to = "contacts", starts_with("mean.") | |
| ) |> | |
| mutate(type = sub("mean.", "", type)) | |
| p <- ggplot(means, aes(x = contacts, fill = type)) + | |
| geom_histogram(binwidth = 1, position = "stack") + | |
| scale_fill_brewer("", palette = "Set1") + | |
| coord_cartesian(xlim = c(0, 25)) + | |
| scale_y_continuous("Number of surveys") + | |
| scale_x_continuous("Mean number of contacts") + | |
| theme(legend.position = "bottom") | |
| save_plot(here("figures", paste0("contact_dist.pdf")), p) | |
| save_plot(here("figures", paste0("contact_dist.png")), p) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment