Last active
October 5, 2020 21:06
-
-
Save scotchneat/4097289da96a2347e88fcf4261b65307 to your computer and use it in GitHub Desktop.
YouTube Transcript Search
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# yt-transcript.py <video_id> | |
from youtube_transcript_api import YouTubeTranscriptApi | |
def search(transcript): | |
search_text = input("What do you want to find?: ") | |
if not search_text: | |
exit() | |
context_lines = int(input("How many context lines (before and after) do you want to show?: ")) | |
matches = 0 | |
for i, item in enumerate(transcript): | |
if search_text.lower() in item['text'].lower(): | |
matches += 1 | |
before = context_lines | |
print(f"\n-- {matches} " + "-" * 80) | |
while before > 0: | |
line = transcript[i - before] | |
print(f"Start: {line['start']} :: {line['text']}") | |
before -= 1 | |
print("*" * 80) | |
print(f"Start: {item['start']} :: {item['text']}") | |
print("*" * 80) | |
after = 1 | |
while after <= context_lines: | |
line = transcript[i + after] | |
print(f"Start: {line['start']} :: {line['text']}") | |
after += 1 | |
print(f"\n=== Found {matches} match{'es' if matches > 1 else ''} ===\n\n") | |
if __name__ == '__main__': | |
video_id = input("\nEnter the YouTube video ID: ") | |
transcript = YouTubeTranscriptApi.get_transcript(video_id) | |
a = 'y' | |
while a: | |
search(transcript) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment