Skip to content

Instantly share code, notes, and snippets.

@hbredin
Last active March 18, 2016 13:49
Show Gist options
  • Save hbredin/4f51dbfc934db2d17e36 to your computer and use it in GitHub Desktop.
Save hbredin/4f51dbfc934db2d17e36 to your computer and use it in GitHub Desktop.
Modular code
"""This module defines functions that do the actual work"""
def time_based_propagation(speaker_diarization, pronounced_names, neighborhood=20.0):
"""Perform time-based propagation and return 'who speaks when' hypothesis as pyannote.core.Annotation"""
hypothesis = Annotation()
# do something with speaker_diarization, pronounced_names and neighborhood
return hypothesis
def speech_turn_based_propagation(speaker_diarization, pronounced_names, neighborhood=1):
"""Perform turn-based propagation and return 'who speaks when' hypothesis as pyannote.core.Annotation"""
hypothesis = Annotation()
# do something with speaker_diarization, pronounced_names and neighborhood
return hypothesis
"""This module defines function to evaluate performance"""
from pyannote.metrics.identification import IdentificationErrorRate
def evaluate(reference, hypothesis):
"""Evaluate hypothesis provided as pyannote.core.Annotation"""
identificationErrorRate = IdentificationErrorRate()
return identificationErrorRate(reference, hypothesis)
"""This module is an example of how you could run your experiments"""
from .approach import time_based_propagation, speech_turn_based_propagation,
from .load import get_who_speaks_when_reference, get_pronounced_names_reference, get_speaker_diarization_reference
from .eval import evaluate
for episode in range(1, 18):
reference = get_who_speaks_when_reference(episode)
pronounced_names = get_pronounced_names_reference(episode)
speaker_diarization = get_speaker_diarization_reference(episode)
for neighborhood in [10., 20., 30.]:
hypothesis = time_based_propagation(speaker_diarization,
pronounced_names,
neighborhood=neighborhood)
performance = evaluate(reference, hypothesis)
# you could print the performance or save it to a file for later plotting
# you could save hypothesis to file to avoid recomputing it later, using 'pyannote.core.json.dump'
for neighborhood in [1, 2, 3]:
hypothesis = turn_based_propagation(speaker_diarization,
pronounced_names,
neighborhood=neighborhood)
performance = evaluate(reference, hypothesis)
# ...
"""This module defines function to load data from files"""
from pyannote.core import Segment, Annotation
def getPathToGroundtruth(episode):
"""Return path to groundtruth file for episode"""
# do something...
# pathToGroundtruth = ...
return pathToGroundtruth
def get_who_speaks_when_reference(episode):
"""Return 'who speaks when' reference as pyannote.core.Annotation"""
pathToGroundtruth = getPathToGroundtruth(episode)
with open(pathToGroundtruth, 'r') as fp:
who_speaks_when = Annotation()
for line in fp:
# do something with file content
# segment = Segment(..., ...)
# speaker = ...
# who_speaks_when[segment] = speaker
return who_speaks_when
def get_pronounced_names_reference(episode):
"""Return 'pronounced names' reference as pyannote.core.Annotation"""
pathToGroundtruth = getPathToGroundtruth(episode)
with open(pathToGroundtruth, 'r') as fp:
pronounced_names = Annotation()
for line in fp:
# do something with file content
# segment = Segment(..., ...)
# name = ...
# pronounced_names[segment] = speaker
return pronounced_names
def get_speaker_diarization_reference(episode):
"""Returns 'speaker diarization' reference as pyannote.core.Annotation"""
who_speaks_when = get_who_speaks_when_reference(episode)
speaker_diarization = who_speaks_when.anonymize_labels()
return speaker_diarization
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment