Created
February 22, 2019 18:25
-
-
Save mauriciogior/9a6b7f05779c4fe81616ba3643232f8b to your computer and use it in GitHub Desktop.
yt-podcast-generator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from feedgen.feed import FeedGenerator | |
from boto.s3.connection import OrdinaryCallingFormat | |
from boto.s3.key import Key | |
from boto.cloudfront import CloudFrontConnection | |
from os import path | |
import boto | |
import boto.s3 | |
import sys | |
import youtube_dl | |
import requests | |
import json | |
def escape(str): | |
str = str.replace("&", "&") | |
str = str.replace("<", "<") | |
str = str.replace(">", ">") | |
str = str.replace("\"", """) | |
return str | |
def percent_cb(complete, total): | |
sys.stdout.write('.') | |
sys.stdout.flush() | |
# AWS Credentials | |
AWS_ACCESS_KEY_ID = '' | |
AWS_SECRET_ACCESS_KEY = '' | |
AWS_CF_DISTRIBUTION_ID = '' # If using | |
YT_KEY = '' | |
PLAYLIST_ID = '' | |
# Initialize database | |
database = { "videos" : [], "index" : [] } | |
# Verifies if file exists | |
if path.isfile('./database.json'): | |
# Load our database | |
with open('./database.json') as infile: | |
database = json.load(infile) | |
else: | |
# Create a database file | |
with open('./database.json', 'w') as outfile: | |
json.dump(database, outfile) | |
# List of newly uploaded videos | |
newVideos = [] | |
# Fixed URL | |
url = 'https://www.googleapis.com/youtube/v3/playlistItems?playlistId=' + PLAYLIST_ID + '&key=' + YT_KEY + '&part=snippet&maxResults=50' | |
resp = requests.get(url=url) | |
data = resp.json() | |
# Loops through all videos & verifies if they're already on database | |
for item in data['items']: | |
snippet = item['snippet'] | |
title = snippet['title'] | |
description = snippet['description'] | |
videoId = snippet['resourceId']['videoId'] | |
date = snippet['publishedAt'] | |
if videoId not in database['index']: | |
video = { | |
'videoId' : videoId.encode('utf-8'), | |
'title' : title.encode('utf-8'), | |
'description' : description.encode('utf-8'), | |
'date' : date | |
} | |
database['index'].insert(0, videoId) | |
database['videos'].insert(0, video) | |
newVideos.insert(0, video) | |
# Sort database items | |
database['videos'].sort(key=lambda x:x['date']) | |
# Saves our database file | |
with open('./database.json', 'w') as outfile: | |
json.dump(database, outfile) | |
# Bucket name for AWS | |
bucket_name = '' | |
conn = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, host='s3.sa-east-1.amazonaws.com', calling_format=OrdinaryCallingFormat()) | |
bucket = conn.get_bucket(bucket_name) | |
# Verifies if we have new videos to upload | |
if len(newVideos) > 0: | |
# Generates mp3 links for new videos | |
for video in newVideos: | |
videoId = video['videoId'] | |
ydl_opts = { | |
'format': 'bestaudio/best', | |
'outtmpl': './%(id)s.%(ext)s', | |
'postprocessors': [{ | |
'key': 'FFmpegExtractAudio', | |
'preferredcodec': 'mp3', | |
'preferredquality': '192', | |
}], | |
} | |
if not path.isfile('./' + videoId + '.mp3'): | |
print('Downloading video ' + video['title']) | |
with youtube_dl.YoutubeDL(ydl_opts) as ydl: | |
ydl.download(['http://www.youtube.com/watch?v=' + videoId]) | |
# Upload to s3 | |
k = Key(bucket) | |
k.key = videoId + '.mp3' | |
if not k.exists(): | |
print('Uploading to s3...') | |
k.set_contents_from_filename('./'+ k.key, cb = percent_cb, num_cb = 10) | |
k.set_acl('public-read') | |
print('Generating rss file...') | |
# Generate feed | |
fg = FeedGenerator() | |
fg.load_extension('podcast') | |
fg.podcast.itunes_owner(name='', email='') | |
fg.podcast.itunes_author('') | |
fg.podcast.itunes_category('Education', 'Higher Education') | |
fg.podcast.itunes_explicit('clean') | |
fg.podcast.itunes_image('') | |
fg.podcast.itunes_subtitle('') | |
fg.podcast.itunes_summary(u'') | |
fg.id('') | |
fg.title('') | |
fg.author(name='', email='') | |
fg.category(term=None, label='', scheme=None) | |
fg.contributor(name='', email='') | |
fg.link(href='http://youtube.com/channelId', rel='alternate') | |
fg.logo('') | |
fg.subtitle(u'') | |
fg.link(href='https://youtube.com/channelId', rel='self') | |
fg.language('') | |
# Add videos to feed | |
for video in database['videos']: | |
fe = fg.add_entry() | |
fe.id(video['videoId']) | |
fe.title(escape(video['title'])) | |
fe.summary(escape(video['description'][0:30])) | |
fe.description(escape(video['description'])) | |
fe.enclosure('http://path/' + video['videoId'] + '.mp3', 0, 'audio/mpeg') | |
fe.pubDate(video['date']) | |
# Write rss file | |
fg.rss_file('./rss.xml') | |
print('Uploading to s3...') | |
# Upload rss to s3 | |
k = Key(bucket) | |
k.key = 'rss.xml' | |
k.set_contents_from_filename('./'+ k.key, cb = percent_cb, num_cb = 10) | |
k.set_acl('public-read') | |
# Invalidate cloudfront object | |
conn = CloudFrontConnection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) | |
print(conn.create_invalidation_request(AWS_CF_DISTRIBUTION_ID, [ '/rss.xml' ])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment