Created
January 24, 2023 04:00
-
-
Save alexlyzhov/5346148870be66eeb4ef23af86721d08 to your computer and use it in GitHub Desktop.
Whisper json processing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# %% | |
import os | |
import sys | |
import json | |
import datetime | |
import numpy as np | |
from tqdm import tqdm | |
from glob import glob | |
import argparse | |
import shutil | |
parser = argparse.ArgumentParser() | |
parser.add_argument('path') | |
parser.add_argument('--input_dir', default='/Users/alexlyzhov/Documents/data/recordings') # should include [wav], mp3, json | |
parser.add_argument('--note_dir', default='/Users/alexlyzhov/Obsidian vault/Sources/Voice') # putting a markdown note here | |
parser.add_argument('--vault_recordings_dir', default='/Users/alexlyzhov/Obsidian vault/Other/assets_tree/recordings') # putting mp3 & json here | |
args = parser.parse_args() | |
filename_stem = os.path.splitext(args.path)[0].split('/')[-1] | |
stem_path = os.path.join(args.input_dir, filename_stem) | |
json_src_path = stem_path + '.json' | |
mp3_src_path = stem_path + '.mp3' | |
date = datetime.datetime.now().strftime('%Y-%m-%d') | |
date_stem = f'{date}_{filename_stem}' | |
note_path = os.path.join(args.note_dir, date_stem + '.md') | |
json_dst_path = os.path.join(args.vault_recordings_dir, date_stem + '.json') | |
mp3_dst_path = os.path.join(args.vault_recordings_dir, date_stem + '.mp3') | |
# %% | |
with open(args.path, 'rb') as f: | |
segments = json.load(f) | |
# %% | |
# if len(subs) > 0: | |
# last_end = datetime.timedelta.total_seconds(subs[-1].end) | |
# if last_end > start: | |
# subs[-1].end = datetime.timedelta(seconds=start) | |
last_segment = None | |
for outer in segments: | |
for segment in outer: | |
assert 'sub_start' in segment | |
# if 'sub_start' not in segment: | |
# continue | |
if last_segment is not None: | |
if last_segment['sub_end'] > segment['sub_start']: | |
last_segment['sub_end'] = segment['sub_start'] | |
# print(last_segment['sub_end']) | |
last_segment = segment | |
# %% | |
segments = list(filter(len, segments)) | |
# %% | |
def stamp_to_repr(stamp): | |
start_h = int(np.floor(stamp/3600)) | |
if start_h == 0: | |
start_min = int(np.floor(stamp/60)) | |
return f'{start_min}min' | |
start_min = int(np.floor(stamp/60-start_h*60)) | |
return f'{start_h}h{start_min}min' | |
# %% | |
def stamps_to_duration_repr(stamp1, stamp2): | |
# h = int(np.floor((stamp2-stamp1)/60)) | |
# if h == 0: | |
# min = int(np.ceil((stamp2-stamp1)/60)) | |
# return f'{min}min' | |
# min = int(np.ceil((stamp2-stamp1)/60)) | |
return int(np.ceil((stamp2-stamp1)/60)) | |
# %% | |
with open(note_path, 'w') as f: | |
print(f'---\ncreated: {date}\n---\n\n- [[{date_stem}.mp3]]\n- [[{date_stem}.json]]\n\n# Comments\n\n# Recording', file=f) | |
for outer in segments: | |
start_stamp = outer[0]['sub_start'] | |
end_stamp = outer[-1]['sub_end'] | |
start_repr = stamp_to_repr(start_stamp) | |
duration_repr = stamps_to_duration_repr(start_stamp, end_stamp) | |
min_quality = round(min([1+segment['avg_logprob'] for segment in outer]), 1) | |
max_quality = round(max([1+segment['avg_logprob'] for segment in outer]), 1) | |
min_quality = int(min([1+segment['avg_logprob'] for segment in outer])*10) | |
max_quality = int(max([1+segment['avg_logprob'] for segment in outer])*10) | |
print(f'(start: {start_repr}, duration: {duration_repr}, quality: {min_quality}..{max_quality})', file=f) | |
for segment in outer: | |
print(segment['text'].strip(), file=f) | |
print(file=f) | |
# %% | |
shutil.copyfile(json_src_path, json_dst_path) | |
shutil.copyfile(mp3_src_path, mp3_dst_path) | |
# %% |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment