Skip to content

Instantly share code, notes, and snippets.

@alexlyzhov
Created January 24, 2023 04:00
Show Gist options
  • Save alexlyzhov/5346148870be66eeb4ef23af86721d08 to your computer and use it in GitHub Desktop.
Save alexlyzhov/5346148870be66eeb4ef23af86721d08 to your computer and use it in GitHub Desktop.
Whisper json processing
# %%
import os
import sys
import json
import datetime
import numpy as np
from tqdm import tqdm
from glob import glob
import argparse
import shutil
parser = argparse.ArgumentParser()
parser.add_argument('path')
parser.add_argument('--input_dir', default='/Users/alexlyzhov/Documents/data/recordings') # should include [wav], mp3, json
parser.add_argument('--note_dir', default='/Users/alexlyzhov/Obsidian vault/Sources/Voice') # putting a markdown note here
parser.add_argument('--vault_recordings_dir', default='/Users/alexlyzhov/Obsidian vault/Other/assets_tree/recordings') # putting mp3 & json here
args = parser.parse_args()
filename_stem = os.path.splitext(args.path)[0].split('/')[-1]
stem_path = os.path.join(args.input_dir, filename_stem)
json_src_path = stem_path + '.json'
mp3_src_path = stem_path + '.mp3'
date = datetime.datetime.now().strftime('%Y-%m-%d')
date_stem = f'{date}_{filename_stem}'
note_path = os.path.join(args.note_dir, date_stem + '.md')
json_dst_path = os.path.join(args.vault_recordings_dir, date_stem + '.json')
mp3_dst_path = os.path.join(args.vault_recordings_dir, date_stem + '.mp3')
# %%
with open(args.path, 'rb') as f:
segments = json.load(f)
# %%
# if len(subs) > 0:
# last_end = datetime.timedelta.total_seconds(subs[-1].end)
# if last_end > start:
# subs[-1].end = datetime.timedelta(seconds=start)
last_segment = None
for outer in segments:
for segment in outer:
assert 'sub_start' in segment
# if 'sub_start' not in segment:
# continue
if last_segment is not None:
if last_segment['sub_end'] > segment['sub_start']:
last_segment['sub_end'] = segment['sub_start']
# print(last_segment['sub_end'])
last_segment = segment
# %%
segments = list(filter(len, segments))
# %%
def stamp_to_repr(stamp):
start_h = int(np.floor(stamp/3600))
if start_h == 0:
start_min = int(np.floor(stamp/60))
return f'{start_min}min'
start_min = int(np.floor(stamp/60-start_h*60))
return f'{start_h}h{start_min}min'
# %%
def stamps_to_duration_repr(stamp1, stamp2):
# h = int(np.floor((stamp2-stamp1)/60))
# if h == 0:
# min = int(np.ceil((stamp2-stamp1)/60))
# return f'{min}min'
# min = int(np.ceil((stamp2-stamp1)/60))
return int(np.ceil((stamp2-stamp1)/60))
# %%
with open(note_path, 'w') as f:
print(f'---\ncreated: {date}\n---\n\n- [[{date_stem}.mp3]]\n- [[{date_stem}.json]]\n\n# Comments\n\n# Recording', file=f)
for outer in segments:
start_stamp = outer[0]['sub_start']
end_stamp = outer[-1]['sub_end']
start_repr = stamp_to_repr(start_stamp)
duration_repr = stamps_to_duration_repr(start_stamp, end_stamp)
min_quality = round(min([1+segment['avg_logprob'] for segment in outer]), 1)
max_quality = round(max([1+segment['avg_logprob'] for segment in outer]), 1)
min_quality = int(min([1+segment['avg_logprob'] for segment in outer])*10)
max_quality = int(max([1+segment['avg_logprob'] for segment in outer])*10)
print(f'(start: {start_repr}, duration: {duration_repr}, quality: {min_quality}..{max_quality})', file=f)
for segment in outer:
print(segment['text'].strip(), file=f)
print(file=f)
# %%
shutil.copyfile(json_src_path, json_dst_path)
shutil.copyfile(mp3_src_path, mp3_dst_path)
# %%
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment