mauriciogior · February 22, 2019 18:25
diff --git a/yt-podcast-generator.py b/yt-podcast-generator.py
 # -*- coding: utf-8 -*- 

 from feedgen.feed import FeedGenerator
 from boto.s3.connection import OrdinaryCallingFormat
 from boto.s3.key import Key
 from boto.cloudfront import CloudFrontConnection
 from os import path
 import boto
 import boto.s3
 import sys
 import youtube_dl
 import requests
 import json

 def escape(str):
 	str = str.replace("&", "&amp;")
 	str = str.replace("<", "&lt;")
 	str = str.replace(">", "&gt;")
 	str = str.replace("\"", "&quot;")
 	return str

 def percent_cb(complete, total):
    sys.stdout.write('.')
    sys.stdout.flush()

 # AWS Credentials
 AWS_ACCESS_KEY_ID = ''
 AWS_SECRET_ACCESS_KEY = ''
 AWS_CF_DISTRIBUTION_ID = '' # If using
 YT_KEY = ''
 PLAYLIST_ID = ''

 # Initialize database
 database = { "videos" : [], "index" : [] }

 # Verifies if file exists
 if path.isfile('./database.json'):
 	# Load our database
 	with open('./database.json') as infile:
 		database = json.load(infile)
 else:
 	# Create a database file
 	with open('./database.json', 'w') as outfile:
 		json.dump(database, outfile)

 # List of newly uploaded videos
 newVideos = []

 # Fixed URL
 url = 'https://www.googleapis.com/youtube/v3/playlistItems?playlistId=' + PLAYLIST_ID + '&key=' + YT_KEY + '&part=snippet&maxResults=50'

 resp = requests.get(url=url)
 data = resp.json()

 # Loops through all videos & verifies if they're already on database
 for item in data['items']:
 	snippet = item['snippet']
 	title = snippet['title']
 	description = snippet['description']
 	videoId = snippet['resourceId']['videoId']
 	date = snippet['publishedAt']

 	if videoId not in database['index']:
 		video = {
 			'videoId' : videoId.encode('utf-8'),
 			'title' : title.encode('utf-8'),
 			'description' : description.encode('utf-8'),
 			'date' : date
 		}

 		database['index'].insert(0, videoId)
 		database['videos'].insert(0, video)
 		newVideos.insert(0, video)

 # Sort database items
 database['videos'].sort(key=lambda x:x['date'])

 # Saves our database file
 with open('./database.json', 'w') as outfile:
 	json.dump(database, outfile)

 # Bucket name for AWS
 bucket_name = ''
 conn = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, host='s3.sa-east-1.amazonaws.com', calling_format=OrdinaryCallingFormat())
 bucket = conn.get_bucket(bucket_name)

 # Verifies if we have new videos to upload
 if len(newVideos) > 0:
 	# Generates mp3 links for new videos
 	for video in newVideos:
 		videoId = video['videoId']

 		ydl_opts = {
 			'format': 'bestaudio/best',
 			'outtmpl': './%(id)s.%(ext)s',
 			'postprocessors': [{
 				'key': 'FFmpegExtractAudio',
 				'preferredcodec': 'mp3',
 				'preferredquality': '192',
 		    }],
 		}

 		if not path.isfile('./' + videoId + '.mp3'):
 			print('Downloading video ' + video['title'])
 			with youtube_dl.YoutubeDL(ydl_opts) as ydl:
 				ydl.download(['http://www.youtube.com/watch?v=' + videoId])

 		# Upload to s3
 		k = Key(bucket)
 		k.key = videoId + '.mp3'
 		
 		if not k.exists():
 			print('Uploading to s3...')
 			k.set_contents_from_filename('./'+ k.key, cb = percent_cb, num_cb = 10)
 			k.set_acl('public-read')

 print('Generating rss file...')

 # Generate feed
 fg = FeedGenerator()
 fg.load_extension('podcast')
 fg.podcast.itunes_owner(name='', email='')
 fg.podcast.itunes_author('')
 fg.podcast.itunes_category('Education', 'Higher Education')
 fg.podcast.itunes_explicit('clean')
 fg.podcast.itunes_image('')
 fg.podcast.itunes_subtitle('')
 fg.podcast.itunes_summary(u'')

 fg.id('')
 fg.title('')
 fg.author(name='', email='')
 fg.category(term=None, label='', scheme=None)
 fg.contributor(name='', email='')
 fg.link(href='http://youtube.com/channelId', rel='alternate')
 fg.logo('')
 fg.subtitle(u'')
 fg.link(href='https://youtube.com/channelId', rel='self')
 fg.language('')

 # Add videos to feed
 for video in database['videos']:
 	fe = fg.add_entry()
 	fe.id(video['videoId'])
 	fe.title(escape(video['title']))
 	fe.summary(escape(video['description'][0:30]))
 	fe.description(escape(video['description']))
 	fe.enclosure('http://path/' + video['videoId'] + '.mp3', 0, 'audio/mpeg')
 	fe.pubDate(video['date'])

 # Write rss file
 fg.rss_file('./rss.xml')

 print('Uploading to s3...')

 # Upload rss to s3
 k = Key(bucket)
 k.key = 'rss.xml'
 k.set_contents_from_filename('./'+ k.key, cb = percent_cb, num_cb = 10)
 k.set_acl('public-read')

 # Invalidate cloudfront object
 conn = CloudFrontConnection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
 print(conn.create_invalidation_request(AWS_CF_DISTRIBUTION_ID, [ '/rss.xml' ]))
	# -- coding: utf-8 --

	from feedgen.feed import FeedGenerator
	from boto.s3.connection import OrdinaryCallingFormat
	from boto.s3.key import Key
	from boto.cloudfront import CloudFrontConnection
	from os import path
	import boto
	import boto.s3
	import sys
	import youtube_dl
	import requests
	import json

	def escape(str):
	str = str.replace("&", "&")
	str = str.replace("<", "<")
	str = str.replace(">", ">")
	str = str.replace("\"", """)
	return str

	def percent_cb(complete, total):
	sys.stdout.write('.')
	sys.stdout.flush()

	# AWS Credentials
	AWS_ACCESS_KEY_ID = ''
	AWS_SECRET_ACCESS_KEY = ''
	AWS_CF_DISTRIBUTION_ID = '' # If using
	YT_KEY = ''
	PLAYLIST_ID = ''

	# Initialize database
	database = { "videos" : [], "index" : [] }

	# Verifies if file exists
	if path.isfile('./database.json'):
	# Load our database
	with open('./database.json') as infile:
	database = json.load(infile)
	else:
	# Create a database file
	with open('./database.json', 'w') as outfile:
	json.dump(database, outfile)

	# List of newly uploaded videos
	newVideos = []

	# Fixed URL
	url = 'https://www.googleapis.com/youtube/v3/playlistItems?playlistId=' + PLAYLIST_ID + '&key=' + YT_KEY + '&part=snippet&maxResults=50'

	resp = requests.get(url=url)
	data = resp.json()

	# Loops through all videos & verifies if they're already on database
	for item in data['items']:
	snippet = item['snippet']
	title = snippet['title']
	description = snippet['description']
	videoId = snippet['resourceId']['videoId']
	date = snippet['publishedAt']

	if videoId not in database['index']:
	video = {
	'videoId' : videoId.encode('utf-8'),
	'title' : title.encode('utf-8'),
	'description' : description.encode('utf-8'),
	'date' : date
	}

	database['index'].insert(0, videoId)
	database['videos'].insert(0, video)
	newVideos.insert(0, video)

	# Sort database items
	database['videos'].sort(key=lambda x:x['date'])

	# Saves our database file
	with open('./database.json', 'w') as outfile:
	json.dump(database, outfile)

	# Bucket name for AWS
	bucket_name = ''
	conn = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, host='s3.sa-east-1.amazonaws.com', calling_format=OrdinaryCallingFormat())
	bucket = conn.get_bucket(bucket_name)

	# Verifies if we have new videos to upload
	if len(newVideos) > 0:
	# Generates mp3 links for new videos
	for video in newVideos:
	videoId = video['videoId']

	ydl_opts = {
	'format': 'bestaudio/best',
	'outtmpl': './%(id)s.%(ext)s',
	'postprocessors': [{
	'key': 'FFmpegExtractAudio',
	'preferredcodec': 'mp3',
	'preferredquality': '192',
	}],
	}

	if not path.isfile('./' + videoId + '.mp3'):
	print('Downloading video ' + video['title'])
	with youtube_dl.YoutubeDL(ydl_opts) as ydl:
	ydl.download(['http://www.youtube.com/watch?v=' + videoId])

	# Upload to s3
	k = Key(bucket)
	k.key = videoId + '.mp3'

	if not k.exists():
	print('Uploading to s3...')
	k.set_contents_from_filename('./'+ k.key, cb = percent_cb, num_cb = 10)
	k.set_acl('public-read')

	print('Generating rss file...')

	# Generate feed
	fg = FeedGenerator()
	fg.load_extension('podcast')
	fg.podcast.itunes_owner(name='', email='')
	fg.podcast.itunes_author('')
	fg.podcast.itunes_category('Education', 'Higher Education')
	fg.podcast.itunes_explicit('clean')
	fg.podcast.itunes_image('')
	fg.podcast.itunes_subtitle('')
	fg.podcast.itunes_summary(u'')

	fg.id('')
	fg.title('')
	fg.author(name='', email='')
	fg.category(term=None, label='', scheme=None)
	fg.contributor(name='', email='')
	fg.link(href='http://youtube.com/channelId', rel='alternate')
	fg.logo('')
	fg.subtitle(u'')
	fg.link(href='https://youtube.com/channelId', rel='self')
	fg.language('')

	# Add videos to feed
	for video in database['videos']:
	fe = fg.add_entry()
	fe.id(video['videoId'])
	fe.title(escape(video['title']))
	fe.summary(escape(video['description'][0:30]))
	fe.description(escape(video['description']))
	fe.enclosure('http://path/' + video['videoId'] + '.mp3', 0, 'audio/mpeg')
	fe.pubDate(video['date'])

	# Write rss file
	fg.rss_file('./rss.xml')

	print('Uploading to s3...')

	# Upload rss to s3
	k = Key(bucket)
	k.key = 'rss.xml'
	k.set_contents_from_filename('./'+ k.key, cb = percent_cb, num_cb = 10)
	k.set_acl('public-read')

	# Invalidate cloudfront object
	conn = CloudFrontConnection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
	print(conn.create_invalidation_request(AWS_CF_DISTRIBUTION_ID, [ '/rss.xml' ]))