Skip to content

Instantly share code, notes, and snippets.

@romiras
Created October 4, 2024 23:08
Show Gist options
  • Save romiras/1ddc065dcd71b860f96720460b5f415a to your computer and use it in GitHub Desktop.
Save romiras/1ddc065dcd71b860f96720460b5f415a to your computer and use it in GitHub Desktop.
Convert a JSON response from Youtube transcript to Markdown
"""
Parses JSON response from https://www.youtube.com/youtubei/v1/get_transcript
and converts to Markdown document
"""
import json
import sys
def get_segments(data):
return data['actions'][0]['updateEngagementPanelAction']['content']['transcriptRenderer']['content']['transcriptSearchPanelRenderer']['body']['transcriptSegmentListRenderer']['initialSegments']
def extract_transcript_segments(segments):
if len(segments) == 0: return
# Loop through subsequent content items and yield them
for segment in segments:
if 'transcriptSectionHeaderRenderer' in segment:
text = segment['transcriptSectionHeaderRenderer']['sectionHeader']['sectionHeaderViewModel']['headline']['content']
yield f"\n\n## {text}\n\n"
if 'transcriptSegmentRenderer' in segment:
for run in segment['transcriptSegmentRenderer']['snippet']['runs']:
text = run['text']
yield text.strip()
def main():
# Read input JSON from stdin
input_json = sys.stdin.read()
data = json.loads(input_json)
segments = get_segments(data)
# Process the JSON and concatenate the transcript segments
concatenated_string = ' '.join(extract_transcript_segments(segments))
# Write the output to stdout
sys.stdout.write(concatenated_string + '\n')
if __name__ == "__main__":
main()
@romiras
Copy link
Author

romiras commented Oct 4, 2024

Usage

python youtube_transcript_to_md.py < transcript.json > transcript.md

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment