hbredin · March 18, 2016 13:49
diff --git a/approach.py b/approach.py
 """This module defines functions that do the actual work"""

 def time_based_propagation(speaker_diarization, pronounced_names, neighborhood=20.0):
    """Perform time-based propagation and return 'who speaks when' hypothesis as pyannote.core.Annotation"""

    hypothesis = Annotation()
    # do something with speaker_diarization, pronounced_names and neighborhood
    return hypothesis


 def speech_turn_based_propagation(speaker_diarization, pronounced_names, neighborhood=1):
    """Perform turn-based propagation and return 'who speaks when' hypothesis as pyannote.core.Annotation"""

    hypothesis = Annotation()
    # do something with speaker_diarization, pronounced_names and neighborhood
    return hypothesis
  
diff --git a/eval.py b/eval.py
 """This module defines function to evaluate performance"""

 from pyannote.metrics.identification import IdentificationErrorRate


 def evaluate(reference, hypothesis):
    """Evaluate hypothesis provided as pyannote.core.Annotation"""
    identificationErrorRate = IdentificationErrorRate()
    return identificationErrorRate(reference, hypothesis)
diff --git a/experiment.py b/experiment.py
 """This module is an example of how you could run your experiments"""

 from .approach import time_based_propagation, speech_turn_based_propagation, 
 from .load import get_who_speaks_when_reference, get_pronounced_names_reference, get_speaker_diarization_reference
 from .eval import evaluate

 for episode in range(1, 18):
    
    reference = get_who_speaks_when_reference(episode)
    pronounced_names = get_pronounced_names_reference(episode)
    speaker_diarization = get_speaker_diarization_reference(episode)
    
    for neighborhood in [10., 20., 30.]:
        hypothesis = time_based_propagation(speaker_diarization, 
                                            pronounced_names,
                                            neighborhood=neighborhood)
        performance = evaluate(reference, hypothesis)
        # you could print the performance or save it to a file for later plotting
        # you could save hypothesis to file to avoid recomputing it later, using 'pyannote.core.json.dump'

    for neighborhood in [1, 2, 3]:
        hypothesis = turn_based_propagation(speaker_diarization, 
                                            pronounced_names,
                                            neighborhood=neighborhood)
        performance = evaluate(reference, hypothesis)
        # ...    
    
    
  
diff --git a/load.py b/load.py
 """This module defines function to load data from files"""

 from pyannote.core import Segment, Annotation


 def getPathToGroundtruth(episode):
    """Return path to groundtruth file for episode"""
    # do something...
    # pathToGroundtruth = ...
    return pathToGroundtruth


 def get_who_speaks_when_reference(episode):
    """Return 'who speaks when' reference as pyannote.core.Annotation"""
    
    pathToGroundtruth = getPathToGroundtruth(episode)
    with open(pathToGroundtruth, 'r') as fp:
        who_speaks_when = Annotation()
        for line in fp:
          # do something with file content
          # segment = Segment(..., ...)
          # speaker = ...
          # who_speaks_when[segment] = speaker
  
    return who_speaks_when



 def get_pronounced_names_reference(episode):
    """Return 'pronounced names' reference as pyannote.core.Annotation"""
    
    pathToGroundtruth = getPathToGroundtruth(episode)
    with open(pathToGroundtruth, 'r') as fp:
        pronounced_names = Annotation()
        for line in fp:
          # do something with file content
          # segment = Segment(..., ...)
          # name = ...
          # pronounced_names[segment] = speaker
  
    return pronounced_names
    
    
 def get_speaker_diarization_reference(episode):
    """Returns 'speaker diarization' reference as pyannote.core.Annotation"""
    who_speaks_when = get_who_speaks_when_reference(episode)
    speaker_diarization = who_speaks_when.anonymize_labels()
    return speaker_diarization
	"""This module defines functions that do the actual work"""

	def time_based_propagation(speaker_diarization, pronounced_names, neighborhood=20.0):
	"""Perform time-based propagation and return 'who speaks when' hypothesis as pyannote.core.Annotation"""

	hypothesis = Annotation()
	# do something with speaker_diarization, pronounced_names and neighborhood
	return hypothesis


	def speech_turn_based_propagation(speaker_diarization, pronounced_names, neighborhood=1):
	"""Perform turn-based propagation and return 'who speaks when' hypothesis as pyannote.core.Annotation"""

	hypothesis = Annotation()
	# do something with speaker_diarization, pronounced_names and neighborhood
	return hypothesis
	"""This module defines function to evaluate performance"""

	from pyannote.metrics.identification import IdentificationErrorRate


	def evaluate(reference, hypothesis):
	"""Evaluate hypothesis provided as pyannote.core.Annotation"""
	identificationErrorRate = IdentificationErrorRate()
	return identificationErrorRate(reference, hypothesis)
	"""This module is an example of how you could run your experiments"""

	from .approach import time_based_propagation, speech_turn_based_propagation,
	from .load import get_who_speaks_when_reference, get_pronounced_names_reference, get_speaker_diarization_reference
	from .eval import evaluate

	for episode in range(1, 18):

	reference = get_who_speaks_when_reference(episode)
	pronounced_names = get_pronounced_names_reference(episode)
	speaker_diarization = get_speaker_diarization_reference(episode)

	for neighborhood in [10., 20., 30.]:
	hypothesis = time_based_propagation(speaker_diarization,
	pronounced_names,
	neighborhood=neighborhood)
	performance = evaluate(reference, hypothesis)
	# you could print the performance or save it to a file for later plotting
	# you could save hypothesis to file to avoid recomputing it later, using 'pyannote.core.json.dump'

	for neighborhood in [1, 2, 3]:
	hypothesis = turn_based_propagation(speaker_diarization,
	pronounced_names,
	neighborhood=neighborhood)
	performance = evaluate(reference, hypothesis)
	# ...
	"""This module defines function to load data from files"""

	from pyannote.core import Segment, Annotation


	def getPathToGroundtruth(episode):
	"""Return path to groundtruth file for episode"""
	# do something...
	# pathToGroundtruth = ...
	return pathToGroundtruth


	def get_who_speaks_when_reference(episode):
	"""Return 'who speaks when' reference as pyannote.core.Annotation"""

	pathToGroundtruth = getPathToGroundtruth(episode)
	with open(pathToGroundtruth, 'r') as fp:
	who_speaks_when = Annotation()
	for line in fp:
	# do something with file content
	# segment = Segment(..., ...)
	# speaker = ...
	# who_speaks_when[segment] = speaker

	return who_speaks_when



	def get_pronounced_names_reference(episode):
	"""Return 'pronounced names' reference as pyannote.core.Annotation"""

	pathToGroundtruth = getPathToGroundtruth(episode)
	with open(pathToGroundtruth, 'r') as fp:
	pronounced_names = Annotation()
	for line in fp:
	# do something with file content
	# segment = Segment(..., ...)
	# name = ...
	# pronounced_names[segment] = speaker

	return pronounced_names


	def get_speaker_diarization_reference(episode):
	"""Returns 'speaker diarization' reference as pyannote.core.Annotation"""
	who_speaks_when = get_who_speaks_when_reference(episode)
	speaker_diarization = who_speaks_when.anonymize_labels()
	return speaker_diarization