Created
January 14, 2019 05:56
-
-
Save k5njm/22dc184f80fafd73566dd61942147bb3 to your computer and use it in GitHub Desktop.
Proof of concept AWS Transcribe Diarization Display
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import json | |
# Proof of concept AWS Transcribe Diarization Display | |
# Expects "asrOutput.json" to be in the same directory as thie file | |
# https://docs.aws.amazon.com/transcribe/latest/dg/how-it-works.html#how-diarization | |
with open('asrOutput.json', 'r') as f: | |
data = f.read() | |
data = json.loads(data) | |
transcript = data['results']['transcripts'][0]['transcript'] | |
wordList = transcript.split() | |
# print(wordList) | |
counter = 0 | |
for segment in data['results']['speaker_labels']['segments']: | |
numWords = len(segment['items']) | |
speaker = segment['speaker_label'].replace("spk_", "Speaker ") + ":" | |
print(speaker) | |
segmentText = wordList[counter:counter + numWords] | |
for x in segmentText: | |
print(x, end=" ") | |
print("\n") | |
counter = counter + numWords |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import json | |
# Proof of concept AWS Transcribe Diarization Display | |
# Expects "asrOutput.json" to be in the same directory as thie file | |
# https://docs.aws.amazon.com/transcribe/latest/dg/how-it-works.html#how-diarization | |
with open('asrOutput.json', 'r') as f: | |
data = f.read() | |
data = json.loads(data) | |
transcript = data['results']['transcripts'][0]['transcript'] | |
wordList = transcript.split() | |
# print(wordList) | |
counter = 0 | |
for segment in data['results']['speaker_labels']['segments']: | |
numWords = len(segment['items']) | |
speaker = segment['speaker_label'].replace("spk_", "Speaker ") + ":" | |
print(speaker) | |
segmentText = wordList[counter:counter + numWords] | |
for x in segmentText: | |
print(x, end=" ") | |
print("\n") | |
counter = counter + numWords |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment