Last active
November 7, 2023 21:37
-
-
Save lmmx/0970a01295e12531f6a3f0ac5537e0b8 to your computer and use it in GitHub Desktop.
Python commands to create speaker diarisation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ffmpeg -i foo.m4a foo.wav | |
from pyannote.audio import Pipeline | |
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization") | |
diarization = pipeline("foo.wav") | |
# RTTM format https://catalog.ldc.upenn.edu/docs/LDC2004T12/RTTM-format-v13.pdf | |
with open("foo.rttm", "w") as rttm: | |
diarization.write_rttm(rttm) | |
import pandas as pd | |
df = pd.read_csv("foo.rttm", sep=" ", header=None, usecols=[3,4,7], names="tbeg tdur stype".split()) | |
def td_time_format(td): | |
parts = td.components | |
return f"{parts.minutes}:{parts.seconds:02}.{parts.milliseconds:03}" | |
df["tbeg_fmt"] = pd.to_timedelta(df.tbeg, unit="s").apply(td_time_format) | |
df["tend_fmt"] = pd.to_timedelta(df.tbeg + df.tdur, unit="s").apply(td_time_format) | |
# Get consecutive speaker runs, or single points | |
# via https://stackoverflow.com/a/71214440/2668831 | |
speaker_runs = { | |
speaker: [ | |
np.array(grp)[[0,-1]].tolist() | |
for grp in np.split(group, np.where(np.diff(group) != 1)[0]+1)] | |
for speaker, group in df.groupby("stype").agg("tbeg_fmt").groups.items() | |
} | |
# 'Roll up' the timestamps over consecutive runs by inverting the dict | |
speaker_order = sorted( | |
[{speaker: run} for speaker, runs in speaker_runs.items() for run in runs], | |
key=lambda d: [*d.values()] | |
) | |
rollup_records = [ | |
{ | |
"tbeg": df.tbeg[start_idx], | |
"tdur": df.tbeg[stop_idx] + df.tdur[stop_idx] - df.tbeg[start_idx], | |
"stype": df.stype[start_idx], | |
"tbeg_fmt": df.tbeg_fmt[start_idx], | |
"tend_fmt": df.tend_fmt[stop_idx], | |
} | |
for order in speaker_order | |
for speaker, (start_idx, stop_idx) in order.items() | |
] | |
rollup_df = df.from_records(rollup_records) | |
# rollup_df["stype"] = rollup_df.stype.replace("SPEAKER_00", "Name0").replace("SPEAKER_01", "Name1").replace("SPEAKER_02", "Name2").replace("SPEAKER_03", "Name3") |
Hey heads up you included your auth token there, might want to delete comment and repost !
Actually not mine, I got it from a space I copied some of the code from. I must remember to sort it in my space. Not sure yet how to work with secrets in HF spaces.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This approach seems simpler and works well too: