gongcastro · August 13, 2021 11:54
diff --git a/processing_audios.R b/processing_audios.R
 # get audio duration

 # set up ----
 library(audio)
 library(purrr)
 library(dplyr)
 library(tidyr)
 library(ggplot2)
 library(PraatR)
 library(stringr)

 # create functions ----
 # load audios from repository
 load_audios <- function(
  path, # path to folder containing the audios
  format = "wav" # audio format (must be "wav")
 ){

  if (!(format %in% "wav")) stop("format must be 'wav'")
  file_paths <- list.files(path, full.names = TRUE)
  file_names <- list.files(path, full.names = FALSE)
  x <- map(file_paths, function(x) as.double(load.wave(x))) %>% set_names(file_names)
  return(x)
 }

 # this will return a data frame of durations (one row per audio)
 get_audio_duration <- function(
  path, # path to folder containing the audios
  sampling_rate = 44000, # recording sampling rate (in Hz)
  units = "s" # time unit (s for seconds, ms for milliseconds)
 ){
  audios <- load_audios(path)
  if (units %in% "ms") sampling_rate <- sampling_rate/1000
  x <- map_dbl(audios, length)/sampling_rate/2
  x <- data.frame(audio = names(x), duration = x)
  row.names(x) <- NULL
  return(x)
 }

 # this will return a data frame with one amplitude measure per time point for each audio
 # multiple rows per audio, and two rows per time point (positive and negative amplitude)
 get_audio_amplitude <- function(
  path, # path to folder containing the audios
  sampling_rate = 44000, # recording sampling rate (in Hz)
  units = "s" # time unit (s for seconds, ms for milliseconds)
 ){
  audios <- load_audios(path)
  x <- map(audios, function(x){
    data.frame(amplitude = x) %>%
      mutate(time = 1:nrow(.)/sampling_rate/2)
  })
  x <- bind_rows(x, .id = "audio")
  x <- x[, c("audio", "time", "amplitude")]
  return(x)
 }

 get_audio_pitch <- function(
  path, # path to folder containing the audios
  time_step = 0.001, # time step in seconds (0.0 = auto)
  pitch_limits = c(95, 800) # pitch floor and ceiling in Hz
 ){

  file_paths <- list.files(path, full.names = TRUE)
  file_names <- list.files(path, full.names = FALSE)
  pitch_args <- list(time_step, pitch_limits[1], pitch_limits[2])
  pitch_paths <- paste0(tempdir(), "\\", file_names)
  pitch_tier_paths <- paste0(tempdir(), "\\", file_names)

  map2(
    .x = file_paths, .y = pitch_paths,
    ~praat(
      "To Pitch...", list(time_step, pitch_limits[1], pitch_limits[2]),
      input = .x, output = .y, overwrite = TRUE
    )
  )
  map2(
    .x = pitch_paths, .y = pitch_tier_paths,
    ~praat(
      "Down to PitchTier",
      input = .x, output = .y, overwrite = TRUE, filetype = "headerless spreadsheet"
    )
  )
  x <- map(
    pitch_tier_paths,
    ~read.table(., col.names = c("time", "f0"))) %>% # read the .Pitch object from the pitch.tier.path
    set_names(file_names) %>%
    bind_rows(.id = "audio")

  return(x)
 }

 # get pitch local maxima for each audio
 get_pitch_maxima <- function(
  pitch # output of get_audio_pitch
 ){
  x <- group_split(pitch, audio)
  x <- x %>%
    map(., "f0") %>%
    map(., ~which(c(NA, ., NA) > c(., NA, NA) & c(NA, ., NA) > c(NA, NA, .))) %>% # get indices of local maxima
    map(`-`, 1) %>%
    map2(x, ., ~slice(.x, .y)) %>%
    map_df(., bind_rows) # merge datasets of all audios into one

  return(x)
 }


 # get formants
 get_audio_formants <- function(
  path,
  max_formants = 2, # maximum number of formats to extract, e.g. if 2, F1 and F2 are extracted
  time_step = 0.001, # time step (s)
  max_frequency = 10000,  # maximum formant (Hz)
  time_window_length = 0.025, # window length (s)
  pre_emphasis = 50, # pre-emphasis from (Hz)
  long_format = FALSE # should table be on long format? (values from F1 and F2 in the same column)
 ){
  file_paths <- list.files(path, full.names = TRUE)
  file_names <- list.files(path, full.names = FALSE)
  formant_args <- list(time_step, max_formants, max_frequency, time_window_length, pre_emphasis)
  formant_paths <- paste0(tempdir(), "\\", file_names)
  formant_tab_paths <- paste0(tempdir(), "\\", file_names)

  # retrieve formants from .wav files
  map2(
    .x = file_paths, .y = formant_paths,
    ~praat(
      "To Formant (burg)...",
      arguments = formant_args, # take the arguments specified above as a list
      input = .x, # paths to the audio files
      output = .y, # paths for the resulting paths of the .Pitch files
      overwrite = TRUE # overwrite the files in the outcome folder if the function is run again
    )
  )

  formant_tab_args <- list(
    FALSE, # include frame number
    TRUE, # include time
    3,    # time decimals
    FALSE, # include intensity
    3,    # intensity decimals
    FALSE, # include number of formants
    3,    # frequency decimals
    FALSE  # include bandwidths
  )


  map2(
    .x = formant_paths, .y = formant_tab_paths,
    ~praat(
      "Down to Table...",
      arguments = formant_tab_args, # take the arguments specified above as a list
      input = .x, # paths to the audio files
      output = .y, # paths for the resulting paths of the .Pitch files
      filetype = "tab-separated", # this format is easier to read
      overwrite = TRUE
    )
  )

  x <- map(
    formant_tab_paths,
    function(x){
      read.table(x, header = TRUE, sep = "\t", na.strings = "--undefined--") %>%
        rename_all(~str_remove_all(., "\\.s\\.|\\.Hz\\.") %>% tolower())
    }
  ) %>%
    set_names(file_names) %>%
    bind_rows(.id = "audio")

  if (long_format) x <- pivot_longer(x, starts_with("f"), names_to = "formant", values_to = "value")
  return(x)
 }




 # some examples ----

 # load audios
 path <- "C:/Users/gonza/Documents/cognate-priming/Stimuli/Sounds/sounds_cat"

 # extract and plot durations
 durations <- get_audio_duration(path, sampling_rate = 48000, units = "s")
 ggplot(durations, aes(x = duration, y = reorder(audio, duration))) +
  geom_point() +
  labs(x = "Duration (s)", y = "Audio") +
  theme_minimal()

 # extract and plot raw amplitudes
 amplitudes <- get_audio_amplitude(path, sampling_rate = 48000, units = "s")
 # subsetting only two audios (plotting all audios may take a while
 ggplot(amplitudes[amplitudes$audio %in% c("abella-i.wav", "porta.wav"),], aes(x = time, y = amplitude)) +
  facet_wrap(~audio, ncol = 1) +
  geom_line() +
  labs(x = "Time (s)", y = "Amplitude (dB)") +
  theme_minimal()

 # extract and plot mean amplitudes
 mean_amplitudes <- amplitudes %>%
  group_by(audio) %>%
  summarise(mean_amplitude = mean(amplitude, na.rm = TRUE), .groups = "drop")

 ggplot(mean_amplitudes, aes(x = mean_amplitude, y = reorder(audio, mean_amplitude))) +
  geom_point() +
  labs(x = "Duration (s)", y = "Audio") +
  theme_minimal()

 # extract and plot pitch
 pitch <- get_audio_pitch(path)
 ggplot(pitch, aes(x = time, y = f0)) +
  geom_line(aes(colour = audio), alpha = 0.5) +
  geom_smooth(colour = "black") +
  labs(x = "Time", y = "F0 (Hz)") +
  theme_minimal() +
  theme(legend.position = "none")

 # extract and plot pitch for one audio (with local maxima)
 pitch_maxima <- get_pitch_maxima(pitch)
 ggplot(pitch[pitch$audio=="abella-i.wav",], aes(x = time, y = f0)) +
  geom_line(size = 1) +
  geom_point(data = pitch_maxima[pitch_maxima$audio=="abella-i.wav",], size = 5, colour = "red") +
  labs(x = "Time", y = "F0 (Hz)") +
  theme_minimal() +
  theme(legend.position = "none")

 # extract and plot formants
 formants <- get_audio_formants(path = path, max_formants = 2, long_format = TRUE)
 ggplot(formants[formants$audio=="abella-i.wav",], aes(x = time, y = value, colour = formant, group = formant)) +
  geom_point(size = 1, alpha = 0.5) +
  geom_smooth() +
  labs(x = "Time (s)", y = "Frequency (Hz)", colour = "Formant") +
  theme_minimal()

 # extract and plot mean formants
 mean_formants <- formants %>%
  group_by(audio, formant) %>%
  summarise(
    mean_value = mean(value, na.rm = TRUE),
    sd_value = sd(value, na.rm = TRUE),
    .groups = "drop"
  )

 ggplot(mean_formants, aes(x = mean_value, y = reorder(audio, mean_value), colour = formant)) +
  geom_errorbar(aes(xmin = mean_value-sd_value, xmax = mean_value+sd_value)) +
  geom_point() +
  labs(x = "Frequency (Hz)", y = "Audio", colour = "Formant") +
  theme_minimal()
	# get audio duration

	# set up ----
	library(audio)
	library(purrr)
	library(dplyr)
	library(tidyr)
	library(ggplot2)
	library(PraatR)
	library(stringr)

	# create functions ----
	# load audios from repository
	load_audios <- function(
	path, # path to folder containing the audios
	format = "wav" # audio format (must be "wav")
	){

	if (!(format %in% "wav")) stop("format must be 'wav'")
	file_paths <- list.files(path, full.names = TRUE)
	file_names <- list.files(path, full.names = FALSE)
	x <- map(file_paths, function(x) as.double(load.wave(x))) %>% set_names(file_names)
	return(x)
	}

	# this will return a data frame of durations (one row per audio)
	get_audio_duration <- function(
	path, # path to folder containing the audios
	sampling_rate = 44000, # recording sampling rate (in Hz)
	units = "s" # time unit (s for seconds, ms for milliseconds)
	){
	audios <- load_audios(path)
	if (units %in% "ms") sampling_rate <- sampling_rate/1000
	x <- map_dbl(audios, length)/sampling_rate/2
	x <- data.frame(audio = names(x), duration = x)
	row.names(x) <- NULL
	return(x)
	}

	# this will return a data frame with one amplitude measure per time point for each audio
	# multiple rows per audio, and two rows per time point (positive and negative amplitude)
	get_audio_amplitude <- function(
	path, # path to folder containing the audios
	sampling_rate = 44000, # recording sampling rate (in Hz)
	units = "s" # time unit (s for seconds, ms for milliseconds)
	){
	audios <- load_audios(path)
	x <- map(audios, function(x){
	data.frame(amplitude = x) %>%
	mutate(time = 1:nrow(.)/sampling_rate/2)
	})
	x <- bind_rows(x, .id = "audio")
	x <- x[, c("audio", "time", "amplitude")]
	return(x)
	}

	get_audio_pitch <- function(
	path, # path to folder containing the audios
	time_step = 0.001, # time step in seconds (0.0 = auto)
	pitch_limits = c(95, 800) # pitch floor and ceiling in Hz
	){

	file_paths <- list.files(path, full.names = TRUE)
	file_names <- list.files(path, full.names = FALSE)
	pitch_args <- list(time_step, pitch_limits[1], pitch_limits[2])
	pitch_paths <- paste0(tempdir(), "\\", file_names)
	pitch_tier_paths <- paste0(tempdir(), "\\", file_names)

	map2(
	.x = file_paths, .y = pitch_paths,
	~praat(
	"To Pitch...", list(time_step, pitch_limits[1], pitch_limits[2]),
	input = .x, output = .y, overwrite = TRUE
	)
	)
	map2(
	.x = pitch_paths, .y = pitch_tier_paths,
	~praat(
	"Down to PitchTier",
	input = .x, output = .y, overwrite = TRUE, filetype = "headerless spreadsheet"
	)
	)
	x <- map(
	pitch_tier_paths,
	~read.table(., col.names = c("time", "f0"))) %>% # read the .Pitch object from the pitch.tier.path
	set_names(file_names) %>%
	bind_rows(.id = "audio")

	return(x)
	}

	# get pitch local maxima for each audio
	get_pitch_maxima <- function(
	pitch # output of get_audio_pitch
	){
	x <- group_split(pitch, audio)
	x <- x %>%
	map(., "f0") %>%
	map(., ~which(c(NA, ., NA) > c(., NA, NA) & c(NA, ., NA) > c(NA, NA, .))) %>% # get indices of local maxima
	map(`-`, 1) %>%
	map2(x, ., ~slice(.x, .y)) %>%
	map_df(., bind_rows) # merge datasets of all audios into one

	return(x)
	}


	# get formants
	get_audio_formants <- function(
	path,
	max_formants = 2, # maximum number of formats to extract, e.g. if 2, F1 and F2 are extracted
	time_step = 0.001, # time step (s)
	max_frequency = 10000, # maximum formant (Hz)
	time_window_length = 0.025, # window length (s)
	pre_emphasis = 50, # pre-emphasis from (Hz)
	long_format = FALSE # should table be on long format? (values from F1 and F2 in the same column)
	){
	file_paths <- list.files(path, full.names = TRUE)
	file_names <- list.files(path, full.names = FALSE)
	formant_args <- list(time_step, max_formants, max_frequency, time_window_length, pre_emphasis)
	formant_paths <- paste0(tempdir(), "\\", file_names)
	formant_tab_paths <- paste0(tempdir(), "\\", file_names)

	# retrieve formants from .wav files
	map2(
	.x = file_paths, .y = formant_paths,
	~praat(
	"To Formant (burg)...",
	arguments = formant_args, # take the arguments specified above as a list
	input = .x, # paths to the audio files
	output = .y, # paths for the resulting paths of the .Pitch files
	overwrite = TRUE # overwrite the files in the outcome folder if the function is run again
	)
	)

	formant_tab_args <- list(
	FALSE, # include frame number
	TRUE, # include time
	3, # time decimals
	FALSE, # include intensity
	3, # intensity decimals
	FALSE, # include number of formants
	3, # frequency decimals
	FALSE # include bandwidths
	)


	map2(
	.x = formant_paths, .y = formant_tab_paths,
	~praat(
	"Down to Table...",
	arguments = formant_tab_args, # take the arguments specified above as a list
	input = .x, # paths to the audio files
	output = .y, # paths for the resulting paths of the .Pitch files
	filetype = "tab-separated", # this format is easier to read
	overwrite = TRUE
	)
	)

	x <- map(
	formant_tab_paths,
	function(x){
	read.table(x, header = TRUE, sep = "\t", na.strings = "--undefined--") %>%
	rename_all(~str_remove_all(., "\\.s\\.\|\\.Hz\\.") %>% tolower())
	}
	) %>%
	set_names(file_names) %>%
	bind_rows(.id = "audio")

	if (long_format) x <- pivot_longer(x, starts_with("f"), names_to = "formant", values_to = "value")
	return(x)
	}




	# some examples ----

	# load audios
	path <- "C:/Users/gonza/Documents/cognate-priming/Stimuli/Sounds/sounds_cat"

	# extract and plot durations
	durations <- get_audio_duration(path, sampling_rate = 48000, units = "s")
	ggplot(durations, aes(x = duration, y = reorder(audio, duration))) +
	geom_point() +
	labs(x = "Duration (s)", y = "Audio") +
	theme_minimal()

	# extract and plot raw amplitudes
	amplitudes <- get_audio_amplitude(path, sampling_rate = 48000, units = "s")
	# subsetting only two audios (plotting all audios may take a while
	ggplot(amplitudes[amplitudes$audio %in% c("abella-i.wav", "porta.wav"),], aes(x = time, y = amplitude)) +
	facet_wrap(~audio, ncol = 1) +
	geom_line() +
	labs(x = "Time (s)", y = "Amplitude (dB)") +
	theme_minimal()

	# extract and plot mean amplitudes
	mean_amplitudes <- amplitudes %>%
	group_by(audio) %>%
	summarise(mean_amplitude = mean(amplitude, na.rm = TRUE), .groups = "drop")

	ggplot(mean_amplitudes, aes(x = mean_amplitude, y = reorder(audio, mean_amplitude))) +
	geom_point() +
	labs(x = "Duration (s)", y = "Audio") +
	theme_minimal()

	# extract and plot pitch
	pitch <- get_audio_pitch(path)
	ggplot(pitch, aes(x = time, y = f0)) +
	geom_line(aes(colour = audio), alpha = 0.5) +
	geom_smooth(colour = "black") +
	labs(x = "Time", y = "F0 (Hz)") +
	theme_minimal() +
	theme(legend.position = "none")

	# extract and plot pitch for one audio (with local maxima)
	pitch_maxima <- get_pitch_maxima(pitch)
	ggplot(pitch[pitch$audio=="abella-i.wav",], aes(x = time, y = f0)) +
	geom_line(size = 1) +
	geom_point(data = pitch_maxima[pitch_maxima$audio=="abella-i.wav",], size = 5, colour = "red") +
	labs(x = "Time", y = "F0 (Hz)") +
	theme_minimal() +
	theme(legend.position = "none")

	# extract and plot formants
	formants <- get_audio_formants(path = path, max_formants = 2, long_format = TRUE)
	ggplot(formants[formants$audio=="abella-i.wav",], aes(x = time, y = value, colour = formant, group = formant)) +
	geom_point(size = 1, alpha = 0.5) +
	geom_smooth() +
	labs(x = "Time (s)", y = "Frequency (Hz)", colour = "Formant") +
	theme_minimal()

	# extract and plot mean formants
	mean_formants <- formants %>%
	group_by(audio, formant) %>%
	summarise(
	mean_value = mean(value, na.rm = TRUE),
	sd_value = sd(value, na.rm = TRUE),
	.groups = "drop"
	)

	ggplot(mean_formants, aes(x = mean_value, y = reorder(audio, mean_value), colour = formant)) +
	geom_errorbar(aes(xmin = mean_value-sd_value, xmax = mean_value+sd_value)) +
	geom_point() +
	labs(x = "Frequency (Hz)", y = "Audio", colour = "Formant") +
	theme_minimal()
No results found