Last active
August 13, 2021 11:54
-
-
Save gongcastro/173e8a378c4106bddc9bb3d18357beba to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# get audio duration | |
# set up ---- | |
library(audio) | |
library(purrr) | |
library(dplyr) | |
library(tidyr) | |
library(ggplot2) | |
library(PraatR) | |
library(stringr) | |
# create functions ---- | |
# load audios from repository | |
load_audios <- function( | |
path, # path to folder containing the audios | |
format = "wav" # audio format (must be "wav") | |
){ | |
if (!(format %in% "wav")) stop("format must be 'wav'") | |
file_paths <- list.files(path, full.names = TRUE) | |
file_names <- list.files(path, full.names = FALSE) | |
x <- map(file_paths, function(x) as.double(load.wave(x))) %>% set_names(file_names) | |
return(x) | |
} | |
# this will return a data frame of durations (one row per audio) | |
get_audio_duration <- function( | |
path, # path to folder containing the audios | |
sampling_rate = 44000, # recording sampling rate (in Hz) | |
units = "s" # time unit (s for seconds, ms for milliseconds) | |
){ | |
audios <- load_audios(path) | |
if (units %in% "ms") sampling_rate <- sampling_rate/1000 | |
x <- map_dbl(audios, length)/sampling_rate/2 | |
x <- data.frame(audio = names(x), duration = x) | |
row.names(x) <- NULL | |
return(x) | |
} | |
# this will return a data frame with one amplitude measure per time point for each audio | |
# multiple rows per audio, and two rows per time point (positive and negative amplitude) | |
get_audio_amplitude <- function( | |
path, # path to folder containing the audios | |
sampling_rate = 44000, # recording sampling rate (in Hz) | |
units = "s" # time unit (s for seconds, ms for milliseconds) | |
){ | |
audios <- load_audios(path) | |
x <- map(audios, function(x){ | |
data.frame(amplitude = x) %>% | |
mutate(time = 1:nrow(.)/sampling_rate/2) | |
}) | |
x <- bind_rows(x, .id = "audio") | |
x <- x[, c("audio", "time", "amplitude")] | |
return(x) | |
} | |
get_audio_pitch <- function( | |
path, # path to folder containing the audios | |
time_step = 0.001, # time step in seconds (0.0 = auto) | |
pitch_limits = c(95, 800) # pitch floor and ceiling in Hz | |
){ | |
file_paths <- list.files(path, full.names = TRUE) | |
file_names <- list.files(path, full.names = FALSE) | |
pitch_args <- list(time_step, pitch_limits[1], pitch_limits[2]) | |
pitch_paths <- paste0(tempdir(), "\\", file_names) | |
pitch_tier_paths <- paste0(tempdir(), "\\", file_names) | |
map2( | |
.x = file_paths, .y = pitch_paths, | |
~praat( | |
"To Pitch...", list(time_step, pitch_limits[1], pitch_limits[2]), | |
input = .x, output = .y, overwrite = TRUE | |
) | |
) | |
map2( | |
.x = pitch_paths, .y = pitch_tier_paths, | |
~praat( | |
"Down to PitchTier", | |
input = .x, output = .y, overwrite = TRUE, filetype = "headerless spreadsheet" | |
) | |
) | |
x <- map( | |
pitch_tier_paths, | |
~read.table(., col.names = c("time", "f0"))) %>% # read the .Pitch object from the pitch.tier.path | |
set_names(file_names) %>% | |
bind_rows(.id = "audio") | |
return(x) | |
} | |
# get pitch local maxima for each audio | |
get_pitch_maxima <- function( | |
pitch # output of get_audio_pitch | |
){ | |
x <- group_split(pitch, audio) | |
x <- x %>% | |
map(., "f0") %>% | |
map(., ~which(c(NA, ., NA) > c(., NA, NA) & c(NA, ., NA) > c(NA, NA, .))) %>% # get indices of local maxima | |
map(`-`, 1) %>% | |
map2(x, ., ~slice(.x, .y)) %>% | |
map_df(., bind_rows) # merge datasets of all audios into one | |
return(x) | |
} | |
# get formants | |
get_audio_formants <- function( | |
path, | |
max_formants = 2, # maximum number of formats to extract, e.g. if 2, F1 and F2 are extracted | |
time_step = 0.001, # time step (s) | |
max_frequency = 10000, # maximum formant (Hz) | |
time_window_length = 0.025, # window length (s) | |
pre_emphasis = 50, # pre-emphasis from (Hz) | |
long_format = FALSE # should table be on long format? (values from F1 and F2 in the same column) | |
){ | |
file_paths <- list.files(path, full.names = TRUE) | |
file_names <- list.files(path, full.names = FALSE) | |
formant_args <- list(time_step, max_formants, max_frequency, time_window_length, pre_emphasis) | |
formant_paths <- paste0(tempdir(), "\\", file_names) | |
formant_tab_paths <- paste0(tempdir(), "\\", file_names) | |
# retrieve formants from .wav files | |
map2( | |
.x = file_paths, .y = formant_paths, | |
~praat( | |
"To Formant (burg)...", | |
arguments = formant_args, # take the arguments specified above as a list | |
input = .x, # paths to the audio files | |
output = .y, # paths for the resulting paths of the .Pitch files | |
overwrite = TRUE # overwrite the files in the outcome folder if the function is run again | |
) | |
) | |
formant_tab_args <- list( | |
FALSE, # include frame number | |
TRUE, # include time | |
3, # time decimals | |
FALSE, # include intensity | |
3, # intensity decimals | |
FALSE, # include number of formants | |
3, # frequency decimals | |
FALSE # include bandwidths | |
) | |
map2( | |
.x = formant_paths, .y = formant_tab_paths, | |
~praat( | |
"Down to Table...", | |
arguments = formant_tab_args, # take the arguments specified above as a list | |
input = .x, # paths to the audio files | |
output = .y, # paths for the resulting paths of the .Pitch files | |
filetype = "tab-separated", # this format is easier to read | |
overwrite = TRUE | |
) | |
) | |
x <- map( | |
formant_tab_paths, | |
function(x){ | |
read.table(x, header = TRUE, sep = "\t", na.strings = "--undefined--") %>% | |
rename_all(~str_remove_all(., "\\.s\\.|\\.Hz\\.") %>% tolower()) | |
} | |
) %>% | |
set_names(file_names) %>% | |
bind_rows(.id = "audio") | |
if (long_format) x <- pivot_longer(x, starts_with("f"), names_to = "formant", values_to = "value") | |
return(x) | |
} | |
# some examples ---- | |
# load audios | |
path <- "C:/Users/gonza/Documents/cognate-priming/Stimuli/Sounds/sounds_cat" | |
# extract and plot durations | |
durations <- get_audio_duration(path, sampling_rate = 48000, units = "s") | |
ggplot(durations, aes(x = duration, y = reorder(audio, duration))) + | |
geom_point() + | |
labs(x = "Duration (s)", y = "Audio") + | |
theme_minimal() | |
# extract and plot raw amplitudes | |
amplitudes <- get_audio_amplitude(path, sampling_rate = 48000, units = "s") | |
# subsetting only two audios (plotting all audios may take a while | |
ggplot(amplitudes[amplitudes$audio %in% c("abella-i.wav", "porta.wav"),], aes(x = time, y = amplitude)) + | |
facet_wrap(~audio, ncol = 1) + | |
geom_line() + | |
labs(x = "Time (s)", y = "Amplitude (dB)") + | |
theme_minimal() | |
# extract and plot mean amplitudes | |
mean_amplitudes <- amplitudes %>% | |
group_by(audio) %>% | |
summarise(mean_amplitude = mean(amplitude, na.rm = TRUE), .groups = "drop") | |
ggplot(mean_amplitudes, aes(x = mean_amplitude, y = reorder(audio, mean_amplitude))) + | |
geom_point() + | |
labs(x = "Duration (s)", y = "Audio") + | |
theme_minimal() | |
# extract and plot pitch | |
pitch <- get_audio_pitch(path) | |
ggplot(pitch, aes(x = time, y = f0)) + | |
geom_line(aes(colour = audio), alpha = 0.5) + | |
geom_smooth(colour = "black") + | |
labs(x = "Time", y = "F0 (Hz)") + | |
theme_minimal() + | |
theme(legend.position = "none") | |
# extract and plot pitch for one audio (with local maxima) | |
pitch_maxima <- get_pitch_maxima(pitch) | |
ggplot(pitch[pitch$audio=="abella-i.wav",], aes(x = time, y = f0)) + | |
geom_line(size = 1) + | |
geom_point(data = pitch_maxima[pitch_maxima$audio=="abella-i.wav",], size = 5, colour = "red") + | |
labs(x = "Time", y = "F0 (Hz)") + | |
theme_minimal() + | |
theme(legend.position = "none") | |
# extract and plot formants | |
formants <- get_audio_formants(path = path, max_formants = 2, long_format = TRUE) | |
ggplot(formants[formants$audio=="abella-i.wav",], aes(x = time, y = value, colour = formant, group = formant)) + | |
geom_point(size = 1, alpha = 0.5) + | |
geom_smooth() + | |
labs(x = "Time (s)", y = "Frequency (Hz)", colour = "Formant") + | |
theme_minimal() | |
# extract and plot mean formants | |
mean_formants <- formants %>% | |
group_by(audio, formant) %>% | |
summarise( | |
mean_value = mean(value, na.rm = TRUE), | |
sd_value = sd(value, na.rm = TRUE), | |
.groups = "drop" | |
) | |
ggplot(mean_formants, aes(x = mean_value, y = reorder(audio, mean_value), colour = formant)) + | |
geom_errorbar(aes(xmin = mean_value-sd_value, xmax = mean_value+sd_value)) + | |
geom_point() + | |
labs(x = "Frequency (Hz)", y = "Audio", colour = "Formant") + | |
theme_minimal() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment