Created
September 27, 2023 10:15
-
-
Save tmalsburg/18b3475107b4ebd7f4e23d55eb577265 to your computer and use it in GitHub Desktop.
Analysis of onset patters of basic Mandarin Chinese color terms
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
library(dplyr) | |
options(scipen=999) | |
N <- 100000 # Number of simulations | |
# Data: | |
onsets <- tibble( | |
onset = c("b", "p", "m", "f", "d", "t", "n", "l", "g", "k", "h", | |
"j", "q", "x", "zh", "ch", "sh", "r", "z", "c", "s"), | |
count = c(16, 16, 18, 9, 21, 19, 23, 24, 18, 18, 19, 13, 14, 14, 20, | |
18, 19, 14, 17, 16, 16), | |
prob = count / sum(count)) | |
# Assuming that color terms have random onsets | |
# (H0), how likely is it that warm colors (yellow, | |
# red) share the same onset and cold colors (blue, | |
# green) share the same onset? | |
sapply(1:N, function(i) { | |
with(onsets, | |
sample(onset, 4, replace=TRUE, prob=prob)) | |
}) |> | |
t() |> | |
data.frame() |> | |
dplyr::rename(yellow=X1, red=X2, blue=X3, green=X4) -> sim | |
# Count cases where “yellow” and “red”, and “blue” | |
# and “green” share the same onset: | |
sim |> | |
filter( | |
yellow == red, | |
blue == green, | |
yellow != green) |> | |
nrow() -> i | |
# p-value: | |
i/N -> p1 | |
# Assuming that “black” and “white” have random | |
# onsets (H0), how likely is it that they start | |
# with onsets that are extremely dissimilar with | |
# respect to place and mode of articulation? | |
# Assign random onsets to black and white and | |
# simulate N time: | |
sapply(1:N, function(i) { | |
with(onsets, | |
sample(onset, 2, replace=TRUE, prob=prob)) | |
}) |> | |
t() |> | |
data.frame() |> | |
dplyr::rename(black=X1, white=X2) -> sim | |
# Count cases with extremely dissimilar onsets: | |
sim |> | |
filter( | |
case_when( | |
black %in% c("p", "b") & white %in% c("h") ~ TRUE, | |
black %in% c("m") & white %in% c("g", "k", "h") ~ TRUE, | |
black %in% c("g", "k") & white %in% c("m", "f") ~ TRUE, | |
black %in% c("f") & white %in% c("g", "k") ~ TRUE, | |
white %in% c("p", "b") & black %in% c("h") ~ TRUE, | |
white %in% c("m") & black %in% c("g", "k", "h") ~ TRUE, | |
white %in% c("g", "k") & black %in% c("m", "f") ~ TRUE, | |
white %in% c("f") & black %in% c("g", "k") ~ TRUE, | |
.default = FALSE) | |
) |> | |
nrow() -> i | |
# p-value: | |
i/N -> p2 | |
p1 * p2 -> p3 | |
message("p1: ", p1) | |
message("p2: ", p2) | |
message("p3: ", p3) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment