Skip to content

Instantly share code, notes, and snippets.

@mauriciogior
Created February 22, 2019 18:25
Show Gist options
  • Save mauriciogior/9a6b7f05779c4fe81616ba3643232f8b to your computer and use it in GitHub Desktop.
Save mauriciogior/9a6b7f05779c4fe81616ba3643232f8b to your computer and use it in GitHub Desktop.
yt-podcast-generator
# -*- coding: utf-8 -*-
from feedgen.feed import FeedGenerator
from boto.s3.connection import OrdinaryCallingFormat
from boto.s3.key import Key
from boto.cloudfront import CloudFrontConnection
from os import path
import boto
import boto.s3
import sys
import youtube_dl
import requests
import json
def escape(str):
str = str.replace("&", "&")
str = str.replace("<", "&lt;")
str = str.replace(">", "&gt;")
str = str.replace("\"", "&quot;")
return str
def percent_cb(complete, total):
sys.stdout.write('.')
sys.stdout.flush()
# AWS Credentials
AWS_ACCESS_KEY_ID = ''
AWS_SECRET_ACCESS_KEY = ''
AWS_CF_DISTRIBUTION_ID = '' # If using
YT_KEY = ''
PLAYLIST_ID = ''
# Initialize database
database = { "videos" : [], "index" : [] }
# Verifies if file exists
if path.isfile('./database.json'):
# Load our database
with open('./database.json') as infile:
database = json.load(infile)
else:
# Create a database file
with open('./database.json', 'w') as outfile:
json.dump(database, outfile)
# List of newly uploaded videos
newVideos = []
# Fixed URL
url = 'https://www.googleapis.com/youtube/v3/playlistItems?playlistId=' + PLAYLIST_ID + '&key=' + YT_KEY + '&part=snippet&maxResults=50'
resp = requests.get(url=url)
data = resp.json()
# Loops through all videos & verifies if they're already on database
for item in data['items']:
snippet = item['snippet']
title = snippet['title']
description = snippet['description']
videoId = snippet['resourceId']['videoId']
date = snippet['publishedAt']
if videoId not in database['index']:
video = {
'videoId' : videoId.encode('utf-8'),
'title' : title.encode('utf-8'),
'description' : description.encode('utf-8'),
'date' : date
}
database['index'].insert(0, videoId)
database['videos'].insert(0, video)
newVideos.insert(0, video)
# Sort database items
database['videos'].sort(key=lambda x:x['date'])
# Saves our database file
with open('./database.json', 'w') as outfile:
json.dump(database, outfile)
# Bucket name for AWS
bucket_name = ''
conn = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, host='s3.sa-east-1.amazonaws.com', calling_format=OrdinaryCallingFormat())
bucket = conn.get_bucket(bucket_name)
# Verifies if we have new videos to upload
if len(newVideos) > 0:
# Generates mp3 links for new videos
for video in newVideos:
videoId = video['videoId']
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': './%(id)s.%(ext)s',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
}],
}
if not path.isfile('./' + videoId + '.mp3'):
print('Downloading video ' + video['title'])
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(['http://www.youtube.com/watch?v=' + videoId])
# Upload to s3
k = Key(bucket)
k.key = videoId + '.mp3'
if not k.exists():
print('Uploading to s3...')
k.set_contents_from_filename('./'+ k.key, cb = percent_cb, num_cb = 10)
k.set_acl('public-read')
print('Generating rss file...')
# Generate feed
fg = FeedGenerator()
fg.load_extension('podcast')
fg.podcast.itunes_owner(name='', email='')
fg.podcast.itunes_author('')
fg.podcast.itunes_category('Education', 'Higher Education')
fg.podcast.itunes_explicit('clean')
fg.podcast.itunes_image('')
fg.podcast.itunes_subtitle('')
fg.podcast.itunes_summary(u'')
fg.id('')
fg.title('')
fg.author(name='', email='')
fg.category(term=None, label='', scheme=None)
fg.contributor(name='', email='')
fg.link(href='http://youtube.com/channelId', rel='alternate')
fg.logo('')
fg.subtitle(u'')
fg.link(href='https://youtube.com/channelId', rel='self')
fg.language('')
# Add videos to feed
for video in database['videos']:
fe = fg.add_entry()
fe.id(video['videoId'])
fe.title(escape(video['title']))
fe.summary(escape(video['description'][0:30]))
fe.description(escape(video['description']))
fe.enclosure('http://path/' + video['videoId'] + '.mp3', 0, 'audio/mpeg')
fe.pubDate(video['date'])
# Write rss file
fg.rss_file('./rss.xml')
print('Uploading to s3...')
# Upload rss to s3
k = Key(bucket)
k.key = 'rss.xml'
k.set_contents_from_filename('./'+ k.key, cb = percent_cb, num_cb = 10)
k.set_acl('public-read')
# Invalidate cloudfront object
conn = CloudFrontConnection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
print(conn.create_invalidation_request(AWS_CF_DISTRIBUTION_ID, [ '/rss.xml' ]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment