Skip to content

Instantly share code, notes, and snippets.

@MHenderson
Last active August 3, 2017 13:17
Show Gist options
  • Save MHenderson/59f5ebb84ef9ad48e493f333f6850353 to your computer and use it in GitHub Desktop.
Save MHenderson/59f5ebb84ef9ad48e493f333f6850353 to your computer and use it in GitHub Desktop.
Co-concordance plotting in ggplot2
library(CorporaCoCo)
library(dhlawrencer)
library(thomashardyr)
library(tidyverse)
library(tidytext)
library(stringi)
nodes <- c('back', 'eye', 'eyes', 'forehead', 'hand', 'hands', 'head', 'shoulder')
hb <- hardy_books()
lb <- lawrence_books()
hardy_words <- unlist(stri_extract_all_words(stri_trans_tolower(hb$text)))
lawrence_words <- unlist(stri_extract_all_words(stri_trans_tolower(lb$text)))
results <- surface_coco(hardy_words, lawrence_words, span = '5LR', nodes = nodes, fdr = 0.01)
ggplot2::theme_set(hrbrthemes::theme_ipsum())
results$z <- paste(results$x, results$y)
ordering <- results %>%
arrange(desc(x), desc(effect_size)) %>%
select(z) %>%
`$`(z)
f <- function(v) paste(format(v[1], justify = "right", width = 10), format(v[2], justify = "left", width = 10))
V <- strsplit(ordering, split = " ")
labels <- map_chr(V, f)
results %>%
ggplot(aes(paste(x, y), effect_size)) +
geom_point(colour = "skyblue4") +
geom_errorbar(aes(ymin = CI_lower, ymax = CI_upper), colour = "seashell4") +
scale_x_discrete(limits = rev(ordering), labels = rev(labels)) +
coord_flip() +
labs(
title = "Co-concordances in Thomas Hardy and D. H. Lawrence",
subtitle = "Comparing the co-occurrences of a set of body part nouns",
caption = paste(unique(hb$book), unique(lb$book), sep = ", ", collapse = ", " ),
x = "",
y = "Effect size"
) +
theme(
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.y = element_text(family = "monospace")
) +
geom_hline(yintercept = 0, colour = "grey", linetype = 2, size = 0.5)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment