Last active
June 27, 2023 04:59
-
-
Save savchenko/f009a01bba39e8cd5c7f53267071130a to your computer and use it in GitHub Desktop.
pyannote
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "22292baf", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from pyannote.core import notebook, Segment\n", | |
"from pyannote.audio import Pipeline\n", | |
"from pyannote.core import Annotation\n", | |
"from os import path, getcwd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "8212d0b3", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pipeline_local = Pipeline.from_pretrained(\"config.yaml\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "7d2a0555", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"cwd = getcwd()\n", | |
"audio_source = f\"{cwd}/whisper_stereo_test_16b.wav\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "0bd3b97a", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# This takes quite some time... \n", | |
"audio_recognised = pipeline_local(audio)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"id": "25151f22", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Sanity-check, visualise results\n", | |
"assert isinstance(audio_recognised, Annotation)\n", | |
"audio_recognised" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"id": "ede4809f", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"labels = path.basename(audio_source).split(\".\")[0] + \"_labels.txt\"\n", | |
"with open(labels, 'w+') as spk_file:\n", | |
" lines = []\n", | |
" for speech_turn, track, speaker in dia.itertracks(yield_label=True):\n", | |
" lines.append(f\"{speech_turn.start:4.1f} {speech_turn.end:4.1f} {speaker}\\n\")\n", | |
" spk_file.writelines(lines)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment