Hammer2900 · July 27, 2018 18:45
diff --git a/youtube_find_relevant.py b/youtube_find_relevant.py
 import re
 import requests


 API_KEY = 'xxx'
 '''Google API (YouTube Data API v3) key from https://console.developers.google.com/apis/.'''

 # Put titles you're interested into RELEVANT string:
 # one title per line.
 RELEVANT = '''
 Trio: Async concurrency for mere mortals
 Solve Your Problem With Sloppy Python
 Dataclasses: The code generator to end all code generators
 Python 3: ten years later
 '''

 CHANNEL = 'UCsX05-2sVSH7Nx3zuk3NYuQ'  # PyCon US 2018 Channel
 '''YouTube channel ID here.'''

 TITLE_POSTFIX = 'PyCon 2018'
 '''Postfix to strip from titles.'''

 #############################################

 _SPACES = re.compile('\s+')
 _BASE_URL = 'https://www.googleapis.com/youtube/v3/search?order=date&part=snippet&channelId=%(channel)s&maxResults=50&key=%(key)s%(page)s'


 def traverse(page=0):

    params = {   
        'channel': CHANNEL,
        'key': API_KEY,
        'page': '',
    }
    
    if page:
        params['page'] = '&pageToken=%s' % page
    
    url = _BASE_URL % params
    
    response = requests.get(url)
    json = response.json()
    
    
    next_page = json.get('nextPageToken')
    
    for item in json['items']:
        
        if item['id']['kind'] != 'youtube#video':
            continue
        
        video_id = item['id']['videoId']
        title = item['snippet']['title']
        
        title = title.replace(TITLE_POSTFIX, '').strip(' -')
        split = title.split(' - ', 1)
        
        prefix = split[0]
        prefix = prefix.replace('/', ',')
        
        if len(prefix.split(' ')) in {2, 3} or (',' in prefix):  
            # strip person name
            try:
                title = split[1]
            except IndexError:
                pass
        
        title = _SPACES.sub(' ', title)
        title = title.strip(' -')
        
        yield video_id, title
        
    if next_page:
        yield from traverse(page=next_page)

        
 def find_relevant():
    
    relevant_lines = []
    
    for line in RELEVANT.splitlines():
        line = line.strip()
        if line:
            line = _SPACES.sub(' ', line)
            relevant_lines.append(line)
    
    total_relevant = len(relevant_lines)
    total_traversed = 0
    
    traversed = [item for item in traverse()][::-1]  # eldest first
    
    for idx, (video_id, title) in enumerate(traversed, 1):
        total_traversed += 1
        
        url = ''
        if title in relevant_lines:
            url = 'https://youtu.be/%s' % video_id
            relevant_lines.remove(title)

        print('%s. %s %s'% (idx, title, url))


    total_missing = len(relevant_lines)
       
    print(
        '\nSummary: among %s found %s of %s, missing %s\n' % (
            total_traversed,
            total_relevant - total_missing,
            total_relevant,
            total_missing
        ))

    for idx, line in enumerate(relevant_lines, 1):
        print('%s. %s'% (idx, line))


 find_relevant()
	import re
	import requests


	API_KEY = 'xxx'
	'''Google API (YouTube Data API v3) key from https://console.developers.google.com/apis/.'''

	# Put titles you're interested into RELEVANT string:
	# one title per line.
	RELEVANT = '''
	Trio: Async concurrency for mere mortals
	Solve Your Problem With Sloppy Python
	Dataclasses: The code generator to end all code generators
	Python 3: ten years later
	'''

	CHANNEL = 'UCsX05-2sVSH7Nx3zuk3NYuQ' # PyCon US 2018 Channel
	'''YouTube channel ID here.'''

	TITLE_POSTFIX = 'PyCon 2018'
	'''Postfix to strip from titles.'''

	#############################################

	_SPACES = re.compile('\s+')
	_BASE_URL = 'https://www.googleapis.com/youtube/v3/search?order=date&part=snippet&channelId=%(channel)s&maxResults=50&key=%(key)s%(page)s'


	def traverse(page=0):

	params = {
	'channel': CHANNEL,
	'key': API_KEY,
	'page': '',
	}

	if page:
	params['page'] = '&pageToken=%s' % page

	url = _BASE_URL % params

	response = requests.get(url)
	json = response.json()


	next_page = json.get('nextPageToken')

	for item in json['items']:

	if item['id']['kind'] != 'youtube#video':
	continue

	video_id = item['id']['videoId']
	title = item['snippet']['title']

	title = title.replace(TITLE_POSTFIX, '').strip(' -')
	split = title.split(' - ', 1)

	prefix = split[0]
	prefix = prefix.replace('/', ',')

	if len(prefix.split(' ')) in {2, 3} or (',' in prefix):
	# strip person name
	try:
	title = split[1]
	except IndexError:
	pass

	title = _SPACES.sub(' ', title)
	title = title.strip(' -')

	yield video_id, title

	if next_page:
	yield from traverse(page=next_page)


	def find_relevant():

	relevant_lines = []

	for line in RELEVANT.splitlines():
	line = line.strip()
	if line:
	line = _SPACES.sub(' ', line)
	relevant_lines.append(line)

	total_relevant = len(relevant_lines)
	total_traversed = 0

	traversed = [item for item in traverse()][::-1] # eldest first

	for idx, (video_id, title) in enumerate(traversed, 1):
	total_traversed += 1

	url = ''
	if title in relevant_lines:
	url = 'https://youtu.be/%s' % video_id
	relevant_lines.remove(title)

	print('%s. %s %s'% (idx, title, url))


	total_missing = len(relevant_lines)

	print(
	'\nSummary: among %s found %s of %s, missing %s\n' % (
	total_traversed,
	total_relevant - total_missing,
	total_relevant,
	total_missing
	))

	for idx, line in enumerate(relevant_lines, 1):
	print('%s. %s'% (idx, line))


	find_relevant()