-
-
Save trappedinspacetime/9e8969640938249a5e2c137c32b5dcd3 to your computer and use it in GitHub Desktop.
Python commands to create speaker diarisation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ffmpeg -i foo.m4a foo.wav | |
from pyannote.audio import Pipeline | |
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization") | |
diarization = pipeline("foo.wav") | |
# RTTM format https://catalog.ldc.upenn.edu/docs/LDC2004T12/RTTM-format-v13.pdf | |
with open("foo.rttm", "w") as rttm: | |
diarization.write_rttm(rttm) | |
import pandas as pd | |
df = pd.read_csv("foo.rttm", sep=" ", header=None, usecols=[3,4,7], names="tbeg tdur stype".split()) | |
def td_time_format(td): | |
parts = td.components | |
return f"{parts.minutes}:{parts.seconds:02}.{parts.milliseconds:03}" | |
df["tbeg_fmt"] = pd.to_timedelta(df.tbeg, unit="s").apply(td_time_format) | |
df["tend_fmt"] = pd.to_timedelta(df.tbeg + df.tdur, unit="s").apply(td_time_format) | |
# Get consecutive speaker runs, or single points | |
# via https://stackoverflow.com/a/71214440/2668831 | |
speaker_runs = { | |
speaker: [ | |
np.array(grp)[[0,-1]].tolist() | |
for grp in np.split(group, np.where(np.diff(group) != 1)[0]+1)] | |
for speaker, group in df.groupby("stype").agg("tbeg_fmt").groups.items() | |
} | |
# 'Roll up' the timestamps over consecutive runs by inverting the dict | |
speaker_order = sorted( | |
[{speaker: run} for speaker, runs in speaker_runs.items() for run in runs], | |
key=lambda d: [*d.values()] | |
) | |
rollup_records = [ | |
{ | |
"tbeg": df.tbeg[start_idx], | |
"tdur": df.tbeg[stop_idx] + df.tdur[stop_idx] - df.tbeg[start_idx], | |
"stype": df.stype[start_idx], | |
"tbeg_fmt": df.tbeg_fmt[start_idx], | |
"tend_fmt": df.tend_fmt[stop_idx], | |
} | |
for order in speaker_order | |
for speaker, (start_idx, stop_idx) in order.items() | |
] | |
rollup_df = df.from_records(rollup_records) | |
# rollup_df["stype"] = rollup_df.stype.replace("SPEAKER_00", "Name0").replace("SPEAKER_01", "Name1").replace("SPEAKER_02", "Name2").replace("SPEAKER_03", "Name3") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment