-
-
Save Hammer2900/de6cedb920f1a464b0559f1ccc39b0fe to your computer and use it in GitHub Desktop.
Finds YouTube videos you're interested in. PyCon US talks finder example.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import requests | |
API_KEY = 'xxx' | |
'''Google API (YouTube Data API v3) key from https://console.developers.google.com/apis/.''' | |
# Put titles you're interested into RELEVANT string: | |
# one title per line. | |
RELEVANT = ''' | |
Trio: Async concurrency for mere mortals | |
Solve Your Problem With Sloppy Python | |
Dataclasses: The code generator to end all code generators | |
Python 3: ten years later | |
''' | |
CHANNEL = 'UCsX05-2sVSH7Nx3zuk3NYuQ' # PyCon US 2018 Channel | |
'''YouTube channel ID here.''' | |
TITLE_POSTFIX = 'PyCon 2018' | |
'''Postfix to strip from titles.''' | |
############################################# | |
_SPACES = re.compile('\s+') | |
_BASE_URL = 'https://www.googleapis.com/youtube/v3/search?order=date&part=snippet&channelId=%(channel)s&maxResults=50&key=%(key)s%(page)s' | |
def traverse(page=0): | |
params = { | |
'channel': CHANNEL, | |
'key': API_KEY, | |
'page': '', | |
} | |
if page: | |
params['page'] = '&pageToken=%s' % page | |
url = _BASE_URL % params | |
response = requests.get(url) | |
json = response.json() | |
next_page = json.get('nextPageToken') | |
for item in json['items']: | |
if item['id']['kind'] != 'youtube#video': | |
continue | |
video_id = item['id']['videoId'] | |
title = item['snippet']['title'] | |
title = title.replace(TITLE_POSTFIX, '').strip(' -') | |
split = title.split(' - ', 1) | |
prefix = split[0] | |
prefix = prefix.replace('/', ',') | |
if len(prefix.split(' ')) in {2, 3} or (',' in prefix): | |
# strip person name | |
try: | |
title = split[1] | |
except IndexError: | |
pass | |
title = _SPACES.sub(' ', title) | |
title = title.strip(' -') | |
yield video_id, title | |
if next_page: | |
yield from traverse(page=next_page) | |
def find_relevant(): | |
relevant_lines = [] | |
for line in RELEVANT.splitlines(): | |
line = line.strip() | |
if line: | |
line = _SPACES.sub(' ', line) | |
relevant_lines.append(line) | |
total_relevant = len(relevant_lines) | |
total_traversed = 0 | |
traversed = [item for item in traverse()][::-1] # eldest first | |
for idx, (video_id, title) in enumerate(traversed, 1): | |
total_traversed += 1 | |
url = '' | |
if title in relevant_lines: | |
url = 'https://youtu.be/%s' % video_id | |
relevant_lines.remove(title) | |
print('%s. %s %s'% (idx, title, url)) | |
total_missing = len(relevant_lines) | |
print( | |
'\nSummary: among %s found %s of %s, missing %s\n' % ( | |
total_traversed, | |
total_relevant - total_missing, | |
total_relevant, | |
total_missing | |
)) | |
for idx, line in enumerate(relevant_lines, 1): | |
print('%s. %s'% (idx, line)) | |
find_relevant() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment