Last active
March 19, 2019 00:07
-
-
Save treyhunner/dfa84e9307c4f532cbab to your computer and use it in GitHub Desktop.
Scour music subreddits for Creative Commons-licensed music on Bandcamp, SoundCloud, and Vimeo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Print CC-licensed music from the last month""" | |
import json | |
import requests | |
banned_strings = [ | |
"blemmed.com", | |
"cuterthanpie.com", | |
"youtu", | |
"www.reddit.com", | |
"npr.org", | |
"imgur.com", | |
"datapiff", | |
"grooveshark.com", | |
"noisetrade.com", | |
"lowerfrequencies.com", | |
"pinballsessions.com", | |
"reverbnation.com", | |
"pitchfork.com", | |
"spotify.com", | |
"codehookup", | |
] | |
def print_cc_url(url): | |
print "CC music at %s" % url | |
def check_bandcamp_url(url): | |
"""Print URL if CC music was found""" | |
response = requests.get(url) | |
if "some rights reserved" in response.content: | |
print_cc_url(url) | |
def check_soundcloud_url(url): | |
"""Print URL if CC music was found""" | |
response = requests.get(url) | |
if "License: cc-" in response.content: | |
print_cc_url(url) | |
def check_vimeo_url(url): | |
"""Print URL if CC video was found""" | |
response = requests.get(url) | |
if 'rel="license"' in response.content: | |
print_cc_url(url) | |
def check_freealbums_top(subreddit, after=None): | |
top_url = 'http://www.reddit.com/r/{}/top.json?limit=100&t=month'.format( | |
subreddit) | |
if after: | |
top_url += '&after=' + after | |
response = requests.get(top_url, | |
headers={'User-Agent': "treyhunner"}) | |
response.raise_for_status() | |
data = json.loads(response.content) | |
print "%s URLs found" % len(data['data']['children']) | |
for link in data['data']['children']: | |
url = link['data']['url'] | |
if 'bandcamp' in url: | |
check_bandcamp_url(url) | |
elif 'soundcloud.com' in url: | |
check_soundcloud_url(url) | |
elif 'vimeo.com' in url: | |
check_vimeo_url(url) | |
elif 'jamendo' in url or 'freemusicarchive' in url: | |
print_cc_url(url) | |
elif not any(s in url for s in banned_strings): | |
print "Non-bandcamp URL: %s" % url | |
after = data['data']['after'] | |
if after: | |
check_freealbums_top(subreddit, after=after) | |
if __name__ == "__main__": | |
subreddit = "+".join([ | |
"RepublicOfMusic", | |
"listentous", | |
"unheardof", | |
"FreeAlbums", | |
"MusicAlbums", | |
"freemusic", | |
"altrap", | |
"indiewok", | |
]) | |
check_freealbums_top(subreddit) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment