Skip to content

Instantly share code, notes, and snippets.

@dauuricus
Created February 27, 2023 04:11
Show Gist options
  • Save dauuricus/c48b4e2c56db1ecd46b5f45e39592c8f to your computer and use it in GitHub Desktop.
Save dauuricus/c48b4e2c56db1ecd46b5f45e39592c8f to your computer and use it in GitHub Desktop.
!pip install youtube_transcript_api
# -*- coding: utf-8 -*-
from youtube_transcript_api import YouTubeTranscriptApi
from urllib.parse import urlparse, parse_qs
urltext = "https://youtu.be/jzTjCgWlFRU"
args = [urltext]
def extract_video_id(url):
query = urlparse(url)
if query.hostname == 'youtu.be': return query.path[1:]
if query.hostname in {'www.youtube.com', 'youtube.com'}:
if query.path == '/watch': return parse_qs(query.query)['v'][0]
if query.path[:7] == '/embed/': return query.path.split('/')[2]
if query.path[:3] == '/v/': return query.path.split('/')[2]
# fail?
else:
return None
for url in args:
video_id = extract_video_id(url)
print('youtube video_id:',video_id)
print()
line =[]
line[:] = YouTubeTranscriptApi.get_transcript(video_id,languages=['en'])
transcript = YouTubeTranscriptApi.list_transcripts(video_id)
english = transcript.find_transcript(['en'])
#print(english.fetch())
caption_line = []
for count, dict_obj in enumerate(english.fetch()):
print("time", dict_obj['start'] , ': ',end='')
print(dict_obj['text'] )
caption_line.append(dict_obj['text'])
print()
print()
print("************************************************************************************")
print()
print("Youtube captions")
print("- - - - - - - - - - - - - - - - - - translated - - - - - - - - - - - - - - - - - - -")
print()
print()
translated = english.translate('ja')
for count, dict_obj in enumerate(translated.fetch()):# japanese
print("time", dict_obj['start'] , ": ",end='')
print(dict_obj['text'])
# text_list = []
# for l in line:
# #print("start:", l['start'])
# print(l['text'])
# #print()
# #print("duration:", l['duration'])
# temp1 = l['text']
# temp2 = temp1.rstrip()
# text_list.append(temp2)
# del line
#for text in text_list:
# print(text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment