Skip to content

Instantly share code, notes, and snippets.

@sbfnk
Created November 1, 2023 14:13
Show Gist options
  • Save sbfnk/bda1943548a643dee44928113a7ec935 to your computer and use it in GitHub Desktop.
Save sbfnk/bda1943548a643dee44928113a7ec935 to your computer and use it in GitHub Desktop.
Visualise some aspects of the contact data set
library("tidyr")
library("dplyr")
library("cowplot")
library("stringi")
library("scales")
library("ggplot2")
library("here")
dt_contacts <- readRDS("contacts.rds")
dir.create(here("figures"), showWarnings = FALSE)
for (type in c("conversational", "physical")) {
## describe contact data
max_people <- dt_contacts |>
group_by(participant_id, season) |>
summarise(n = n(), .groups = "drop") |>
group_by(participant_id) |>
summarise(
seasons = length(unique(season)), min = min(n), .groups = "drop"
) |>
filter(seasons == 2) |>
arrange(min) |>
tail(n = 6) |>
pull(participant_id)
p <- ggplot(
dt_contacts |>
filter(participant_id %in% max_people),
aes(x = date, y = .data[[type]], group = season)
) +
geom_point() +
geom_line() +
scale_color_brewer(palette = "Set1") +
facet_wrap(~participant_id) +
scale_x_date("Season",
breaks = date_breaks("1 year"), labels = date_format("%Y")) +
scale_y_continuous(paste0("Number of ", type, " contacts")) +
theme(legend.position = "none")
save_plot(
here("figures", paste0("example_users_", type, ".pdf")), p,
base_aspect_ratio = 2
)
save_plot(
here("figures", paste0("example_users_", type, ".png")), p,
base_aspect_ratio = 2
)
means <- dt_contacts |>
group_by(participant_id) |>
mutate(
mean = mean(.data[[type]]),
var = var(.data[[type]]),
type = .data[[type]]
) |>
ungroup()
var_pop <- means |>
summarise(
var = var(mean),
mean = mean(mean),
.groups = "drop"
)
p <- ggplot(means, aes(x = mean)) +
geom_histogram(binwidth = 1) +
coord_cartesian(xlim = c(0, 25)) +
scale_y_continuous("Number of surveys") +
scale_x_continuous("Mean number of contacts")
save_plot(here("figures", paste0(type, "_dist.pdf")), p)
save_plot(here("figures", paste0(type, "_dist.png")), p)
max_contacts <-
min(
100,
max(
means |>
filter(mean <= 100, !is.na(var)) |>
pull(mean)
)
)
max_meanvar <- max(log10(means$mean), log10(means$var), na.rm = TRUE)
min_meanvar <- min(
means |>
filter(mean > 0) |>
pull(mean) |>
log10(),
means |>
filter(var > 0) |>
pull(var) |>
log10()
)
p <- ggplot(
means |>
filter(mean > 0, var > 0),
aes(x = log10(mean), y = log10(var))
) +
geom_jitter() +
geom_smooth(method = lm, formula = y ~ x) +
expand_limits(x = c(min_meanvar, max_meanvar * 1.1),
y = c(min_meanvar, max_meanvar * 1.1)) +
geom_line(data = data.frame(mean = 10 ** c(min_meanvar, max_meanvar),
var = 10 ** c(min_meanvar, max_meanvar)),
linetype = "dashed") +
geom_point(data = data.frame(var_pop), color = "red") +
scale_x_continuous("Mean (log-scale)") +
scale_y_continuous("Variance (log-scale)") +
ggtitle(paste0("Number of ", type, " contacts"))
save_plot(here("figures", paste0(type, "_contacts_mean_var.pdf")), p)
save_plot(here("figures", paste0(type, "_contacts_mean_var.png")), p)
## contact with respect to background
bg_means <- dt_contacts |>
group_by(participant_id) |>
mutate(
mean = mean(.data[[type]]),
var = var(.data[[type]]),
type = .data[[type]]
) |>
ungroup()
max_y <- bg_means |>
pull(mean) |>
quantile(probs = 0.95, na.rm = TRUE)
type_settings <- colnames(dt_contacts) |>
grep(pattern = paste0("^", type, "."), value = TRUE) |>
grep(pattern = "^[^0-9]*$", value = TRUE)
## setting
formulae <- paste0("~mean(", type_settings, ")") |>
lapply(FUN = as.formula) |>
do.call(what = quos)
bg_type_means <- dt_contacts |>
group_by(participant_id) |>
mutate_at(c(type_settings), mean) |>
pivot_longer(
names_to = "setting", values_to = "mean", type_settings
) |>
mutate(setting = sub(".*\\.", "", setting)) |>
mutate(setting = ifelse(setting == "work", "work/school", setting))
max_type_y <- bg_type_means |>
pull(mean) |>
quantile(probs = 0.95, na.rm = TRUE)
## age
p <- ggplot(
bg_means |>
filter(!is.na(age)),
aes(x = agegroup, y = mean)
) +
geom_boxplot() +
coord_cartesian(ylim = c(0, max_y)) +
scale_y_continuous(paste0("Number of ", type, " contacts")) +
scale_x_discrete("Age group")
save_plot(here("figures", paste0(type, "_age.pdf")), p)
save_plot(here("figures", paste0(type, "_age.png")), p)
p <- ggplot(
bg_type_means |>
filter(!is.na(age)),
aes(x = agegroup, y = mean, color = setting)
) +
geom_boxplot() +
coord_cartesian(ylim = c(0, max_y)) +
scale_y_continuous(paste0("Number of ", type, " contacts")) +
scale_x_discrete("Age group") +
scale_color_brewer("", palette = "Set1") +
theme(legend.position = "top")
save_plot(here("figures", paste0(type, "_age_setting.pdf")), p)
save_plot(here("figures", paste0(type, "_age_setting.png")), p)
## education
p <- ggplot(
bg_type_means |>
filter(!is.na(highest.education)),
aes(x = highest.education, y = mean)
) +
geom_boxplot() +
coord_cartesian(ylim = c(0, max_y)) +
scale_y_continuous(paste0("Number of ", type, " contacts")) +
scale_x_discrete(
"Highest education level",
labels = c("None", "GCSE", "A-levels", "BSc", "MSc", "Student")
)
save_plot(here("figures", paste0(type, "_education.pdf")), p)
save_plot(here("figures", paste0(type, "_education.png")), p)
p <- ggplot(
bg_type_means |>
filter(!is.na(highest.education)),
aes(x = highest.education, y = mean, color = setting)
) +
geom_boxplot() +
coord_cartesian(ylim = c(0, max_y)) +
scale_y_continuous(paste0("Number of ", type, " contacts")) +
scale_x_discrete(
"Highest education level",
labels = c("None", "GCSE", "A-levels", "BSc", "MSc", "Student")
) +
scale_color_brewer("", palette = "Set1") +
theme(legend.position = "top")
save_plot(here("figures", paste0(type, "_education_setting.pdf")), p)
save_plot(here("figures", paste0(type, "_education_setting.png")), p)
## students
p <- ggplot(
bg_type_means |>
filter(!is.na(education.stillin)),
aes(x = education.stillin, y = mean)
) +
geom_boxplot() +
coord_cartesian(ylim = c(0, max_y)) +
scale_y_continuous(paste0("Number of ", type, " contacts")) +
scale_x_discrete("In education", labels = c("No", "Yes"))
save_plot(here("figures", paste0(type, "_students.pdf")), p)
save_plot(here("figures", paste0(type, "_students.png")), p)
p <- ggplot(
bg_type_means |>
filter(!is.na(education.stillin)),
aes(x = education.stillin, y = mean, color = setting)
) +
geom_boxplot() +
coord_cartesian(ylim = c(0, max_y)) +
scale_y_continuous(paste0("Number of ", type, " contacts")) +
scale_x_discrete("In education", labels = c("No", "Yes")) +
scale_color_brewer("", palette = "Set1") +
theme(legend.position = "top")
save_plot(here("figures", paste0(type, "_students_setting.pdf")), p)
save_plot(here("figures", paste0(type, "_students_setting.png")), p)
## rural/urban? postcode?
p <- ggplot(
bg_means |>
filter(!is.na(urban.rural)),
aes(x = urban.rural, y = mean)
) +
geom_boxplot() +
coord_cartesian(ylim = c(0, max_y)) +
scale_y_continuous(paste0("Number of ", type, " contacts")) +
scale_x_discrete("Settlement type")
save_plot(here("figures", paste0(type, "_settlement.pdf")), p)
save_plot(here("figures", paste0(type, "_settlement.png")), p)
p <- ggplot(
bg_type_means |>
filter(!is.na(urban.rural)),
aes(x = urban.rural, y = mean, color = setting)
) +
geom_boxplot() +
coord_cartesian(ylim = c(0, max_y)) +
scale_y_continuous(paste0("Number of ", type, " contacts")) +
scale_x_discrete("Settlement type") +
scale_color_brewer("", palette = "Set1") +
theme(legend.position = "top")
save_plot(here("figures", paste0(type, "_settlement_setting.pdf")), p)
save_plot(here("figures", paste0(type, "_settlement_setting.png")), p)
p <- ggplot(
bg_means |>
filter(!is.na(work.urban.rural)),
aes(x = work.urban.rural, y = mean)
) +
geom_boxplot() +
coord_cartesian(ylim = c(0, max_y)) +
scale_y_continuous(paste0("Number of ", type, " contacts")) +
scale_x_discrete("Work settlement type")
save_plot(here("figures", paste0(type, "_work_settlement.pdf")), p)
save_plot(here("figures", paste0(type, "_work_settlement.png")), p)
p <- ggplot(
bg_type_means |>
filter(!is.na(work.urban.rural)),
aes(x = work.urban.rural, y = mean, color = setting)
) +
geom_boxplot() +
coord_cartesian(ylim = c(0, max_y)) +
scale_y_continuous(paste0("Number of ", type, " contacts")) +
scale_x_discrete("Work settlement type") +
scale_color_brewer("", palette = "Set1") +
theme(legend.position = "top")
save_plot(here("figures", paste0(type, "_work_settlement_setting.pdf")), p)
save_plot(here("figures", paste0(type, "_work_settlement_setting.png")), p)
## main activity
p <- ggplot(
bg_means |>
filter(!is.na(main.activity)),
aes(x = main.activity, y = mean)
) +
geom_boxplot() +
coord_cartesian(ylim = c(0, max_y)) +
scale_y_continuous(paste0("Number of ", type, " contacts")) +
scale_x_discrete(
"Main activity",
labels = c(
"Full time employed",
"Part-time employed",
"Self-employed",
"School",
"Home-maker",
"Unemployed",
"Long-term leave",
"Retired",
"Other"
)
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
save_plot(here("figures", paste0(type, "_main_activity.pdf")), p)
save_plot(here("figures", paste0(type, "_main_activity.png")), p)
p <- ggplot(
bg_type_means |>
filter(!is.na(main.activity)),
aes(x = main.activity, y = mean, color = setting)
) +
geom_boxplot() +
coord_cartesian(ylim = c(0, max_y)) +
scale_y_continuous(paste0("Number of ", type, " contacts")) +
scale_x_discrete(
"Main activity",
labels = c(
"Full time employed",
"Part-time employed",
"Self-employed",
"School",
"Home-maker",
"Unemployed",
"Long-term leave",
"Retired",
"Other"
)
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
scale_color_brewer("", palette = "Set1") +
theme(legend.position = "top")
save_plot(here("figures", paste0(type, "_main_activity_setting.pdf")), p)
save_plot(here("figures", paste0(type, "_main_activity_setting.png")), p)
## occupation
p <- ggplot(
bg_means |>
filter(!is.na(occupation)),
aes(x = occupation, y = mean)
) +
geom_boxplot() +
coord_cartesian(ylim = c(0, max_y)) +
scale_y_continuous(paste0("Number of ", type, " contacts")) +
scale_x_discrete(
"Occupation",
labels = c(
"Professional", "Office worker",
"Retail", "Skilled manual",
"Other manual", "Other"
)
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
save_plot(here("figures", paste0(type, "_occupation.pdf")), p)
save_plot(here("figures", paste0(type, "_occupation.png")), p)
## household size?
bg_means <- bg_means |>
mutate(
hh_group = factor(
ifelse(nb.household < 6, as.character(nb.household), "6+"),
levels = c(as.character(seq_len(6)-1), "6+")
)
)
## household size?
bg_type_means <- bg_type_means |>
mutate(
hh_group = factor(
ifelse(nb.household < 6, as.character(nb.household), "6+"),
levels = c(as.character(seq_len(6)-1), "6+")
)
)
p <- ggplot(bg_means, aes(x = hh_group, y = mean)) +
geom_boxplot() +
coord_cartesian(ylim = c(0, max_y)) +
scale_y_continuous(paste0("Number of ", type, " contacts")) +
scale_x_discrete("Household size")
save_plot(here("figures", paste0(type, "_hh_size.pdf")), p)
save_plot(here("figures", paste0(type, "_hh_size.png")), p)
p <- ggplot(bg_type_means, aes(x = hh_group, y = mean, color = setting)) +
geom_boxplot() +
coord_cartesian(ylim = c(0, max_y)) +
scale_y_continuous(paste0("Number of ", type, " contacts")) +
scale_x_discrete("Household size") +
scale_color_brewer("", palette = "Set1") +
theme(legend.position = "top")
save_plot(here("figures", paste0(type, "_hh_size_setting.pdf")), p)
save_plot(here("figures", paste0(type, "_hh_size_setting.png")), p)
bg_means <- bg_means |>
mutate(
hh_children_group = factor(
ifelse(
nb.household.children < 6,
as.character(nb.household.children),
"6+"
),
levels = c(as.character(seq_len(6) - 1), "6+")
)
)
bg_type_means <- bg_type_means |>
mutate(
hh_children_group = factor(
ifelse(
nb.household.children < 6,
as.character(nb.household.children), "6+"),
levels = c(as.character(seq_len(6) - 1), "6+")
)
)
p <- ggplot(bg_means, aes(x = hh_children_group, y = mean)) +
geom_boxplot() +
coord_cartesian(ylim = c(0, max_y)) +
scale_y_continuous(paste0("Number of ", type, " contacts")) +
scale_x_discrete("Number of children in the household")
save_plot(here("figures", paste0(type, "_hh_size_children.pdf")), p)
save_plot(here("figures", paste0(type, "_hh_size_children.png")), p)
p <- ggplot(
bg_type_means,
aes(x = hh_children_group, y = mean, color = setting)
) +
geom_boxplot() +
coord_cartesian(ylim = c(0, max_y)) +
scale_y_continuous(paste0("Number of ", type, " contacts")) +
scale_x_discrete("Number of children in the household") +
scale_color_brewer("", palette = "Set1") +
theme(legend.position = "top")
save_plot(here("figures", paste0(type, "_hh_size_children_setting.pdf")), p)
save_plot(here("figures", paste0(type, "_hh_size_children_setting.png")), p)
## regional differences
p <- ggplot(
bg_means |>
filter(!is.na(country)),
aes(x = country, y = mean)
) +
geom_boxplot() +
coord_cartesian(ylim = c(0, max_y)) +
scale_y_continuous(paste0("Number of ", type, " contacts")) +
scale_x_discrete(
"Country",
labels = stri_trans_totitle(gsub("_", " ", levels(bg_means$country)))
)
save_plot(here("figures", paste0(type, "_country.pdf")), p)
save_plot(here("figures", paste0(type, "_country.png")), p)
p <- ggplot(
bg_type_means |>
filter(!is.na(country)),
aes(x = country, y = mean, color = setting)
) +
geom_boxplot() +
coord_cartesian(ylim = c(0, max_y)) +
scale_y_continuous(paste0("Number of ", type, " contacts")) +
scale_x_discrete(
"Country",
labels = stri_trans_totitle(gsub("_", " ", levels(bg_means$country)))
) +
scale_color_brewer("", palette = "Set1") +
theme(legend.position = "top")
save_plot(here("figures", paste0(type, "_country_setting.pdf")), p)
save_plot(here("figures", paste0(type, "_country_setting.png")), p)
p <- ggplot(
bg_means |>
filter(!is.na(region)),
aes(x = region, y = mean)
) +
geom_boxplot() +
coord_cartesian(ylim = c(0, max_y)) +
scale_y_continuous(paste0("Number of ", type, " contacts")) +
scale_x_discrete(
"Region",
labels = stri_trans_totitle(gsub("_", " ", levels(bg_means$region)))
) +
theme(axis.text.x = element_text(angle = 45,hjust = 1,vjust = 1))
save_plot(here("figures", paste0(type, "_region.pdf")), p)
save_plot(here("figures", paste0(type, "_region.png")), p)
p <- ggplot(
bg_type_means |>
filter(!is.na(region)),
aes(x = region, y = mean, color = setting)
) +
geom_boxplot() +
coord_cartesian(ylim = c(0, max_y)) +
scale_y_continuous(paste0("Number of ", type, " contacts")) +
scale_x_discrete(
"Region",
labels = stri_trans_totitle(gsub("_", " ", levels(bg_means$region)))
) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
legend.position = "top"
) +
scale_color_brewer("", palette = "Set1")
save_plot(here("figures", paste0(type, "_region_setting.pdf")), p)
save_plot(here("figures", paste0(type, "_region_setting.png")), p)
}
means <- dt_contacts |>
group_by(participant_id) |>
mutate(
mean.physical = mean(physical),
mean.conversational = mean(conversational)
) |>
ungroup() |>
pivot_longer(
names_to = "type", values_to = "contacts", starts_with("mean.")
) |>
mutate(type = sub("mean.", "", type))
p <- ggplot(means, aes(x = contacts, fill = type)) +
geom_histogram(binwidth = 1, position = "stack") +
scale_fill_brewer("", palette = "Set1") +
coord_cartesian(xlim = c(0, 25)) +
scale_y_continuous("Number of surveys") +
scale_x_continuous("Mean number of contacts") +
theme(legend.position = "bottom")
save_plot(here("figures", paste0("contact_dist.pdf")), p)
save_plot(here("figures", paste0("contact_dist.png")), p)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment