edsu · December 14, 2019 14:36
diff --git a/aoty b/aoty
 #!/usr/bin/env python3

 # usage: aoty [year]
 # 
 # This script collects all the albums of the year for Alf's awesome
 # AOTY site http://apps.hubmed.org/aoty and prints out the albums
 # that appear on more than one Album of the Year list.
 #
 # You'll need beautifulsoup4 and requests to run this.

 import sys
 import datetime
 import requests

 from bs4 import BeautifulSoup
 from collections import Counter

 if len(sys.argv) > 1:
    year = sys.argv[1]
 else:
    year = str(datetime.date.today().year)

 counter = Counter()

 url = 'http://apps.hubmed.org/aoty/' + year + '/'
 while True:
    html = requests.get(url).text
    doc = BeautifulSoup(html, features="html.parser")
    for li in doc.find_all('li'):
        if li.get('itemtype') == 'http://schema.org/MusicAlbum':
            band, album = [a.text.strip() for a in li.find_all('a')]
            s = '{} - {}'.format(band, album)
            counter[s] += 1
    next_url = doc.select('a[rel="next"]')
    if len(next_url) > 0:
        url = 'http://apps.hubmed.org' + next_url[0]['href']
    else:
        break

 for name, count in counter.most_common():
    if count > 1:
        print('{: >2} {}'.format(count, name))
	#!/usr/bin/env python3

	# usage: aoty [year]
	#
	# This script collects all the albums of the year for Alf's awesome
	# AOTY site http://apps.hubmed.org/aoty and prints out the albums
	# that appear on more than one Album of the Year list.
	#
	# You'll need beautifulsoup4 and requests to run this.

	import sys
	import datetime
	import requests

	from bs4 import BeautifulSoup
	from collections import Counter

	if len(sys.argv) > 1:
	year = sys.argv[1]
	else:
	year = str(datetime.date.today().year)

	counter = Counter()

	url = 'http://apps.hubmed.org/aoty/' + year + '/'
	while True:
	html = requests.get(url).text
	doc = BeautifulSoup(html, features="html.parser")
	for li in doc.find_all('li'):
	if li.get('itemtype') == 'http://schema.org/MusicAlbum':
	band, album = [a.text.strip() for a in li.find_all('a')]
	s = '{} - {}'.format(band, album)
	counter[s] += 1
	next_url = doc.select('a[rel="next"]')
	if len(next_url) > 0:
	url = 'http://apps.hubmed.org' + next_url[0]['href']
	else:
	break

	for name, count in counter.most_common():
	if count > 1:
	print('{: >2} {}'.format(count, name))