Skip to content

Instantly share code, notes, and snippets.

@k5njm
Created January 14, 2019 05:56
Show Gist options
  • Save k5njm/22dc184f80fafd73566dd61942147bb3 to your computer and use it in GitHub Desktop.
Save k5njm/22dc184f80fafd73566dd61942147bb3 to your computer and use it in GitHub Desktop.
Proof of concept AWS Transcribe Diarization Display
from __future__ import print_function
import json
# Proof of concept AWS Transcribe Diarization Display
# Expects "asrOutput.json" to be in the same directory as thie file
# https://docs.aws.amazon.com/transcribe/latest/dg/how-it-works.html#how-diarization
with open('asrOutput.json', 'r') as f:
data = f.read()
data = json.loads(data)
transcript = data['results']['transcripts'][0]['transcript']
wordList = transcript.split()
# print(wordList)
counter = 0
for segment in data['results']['speaker_labels']['segments']:
numWords = len(segment['items'])
speaker = segment['speaker_label'].replace("spk_", "Speaker ") + ":"
print(speaker)
segmentText = wordList[counter:counter + numWords]
for x in segmentText:
print(x, end=" ")
print("\n")
counter = counter + numWords
from __future__ import print_function
import json
# Proof of concept AWS Transcribe Diarization Display
# Expects "asrOutput.json" to be in the same directory as thie file
# https://docs.aws.amazon.com/transcribe/latest/dg/how-it-works.html#how-diarization
with open('asrOutput.json', 'r') as f:
data = f.read()
data = json.loads(data)
transcript = data['results']['transcripts'][0]['transcript']
wordList = transcript.split()
# print(wordList)
counter = 0
for segment in data['results']['speaker_labels']['segments']:
numWords = len(segment['items'])
speaker = segment['speaker_label'].replace("spk_", "Speaker ") + ":"
print(speaker)
segmentText = wordList[counter:counter + numWords]
for x in segmentText:
print(x, end=" ")
print("\n")
counter = counter + numWords
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment