Skip to content

Instantly share code, notes, and snippets.

@iannase
Created April 8, 2018 22:13
Show Gist options
  • Save iannase/8e801029bdd3e4e52837fe1f7996be31 to your computer and use it in GitHub Desktop.
Save iannase/8e801029bdd3e4e52837fe1f7996be31 to your computer and use it in GitHub Desktop.
Python web scraper that gets the new / top 50 charts, also allows you to play tracks using the selenium browser
from selenium import webdriver
import requests
import bs4
import os
# new, top, or mix url
top_url = "https://soundcloud.com/charts/top"
new_url = "https://soundcloud.com/charts/new"
track_url = "https://soundcloud.com/search/sounds?q="
artist_url = "https://soundcloud.com/search/people?q="
mix_url_end = "&filter.duration=epic"
# create the selenium browser
browser = webdriver.Chrome('/Users/ian/Downloads/chromedriver')
browser.get("https://soundcloud.com")
# main menu
print()
print(">>> Welcome to the Python Soundcloud Scraper")
print(">>> Explore the Top / New & Hot Charts for all Genres")
print(">>> Search Soundcloud for Tracks, Artist, and Mixes")
print()
# new or top menu
while True:
print(">>> Menu")
print(">>> 1 - Search for a track")
print(">>> 2 - Search for an artist")
print(">>> 3 - Search for a mix")
print(">>> 4 - Top charts")
print(">>> 5 - New & hot charts")
print(">>> 0 - Exit")
print()
choice = int(input(">>> Your choice: "))
if choice == 0:
browser.quit()
break
print()
# search for a track
if choice == 1:
name = input("Name of the track: ")
print()
"%20".join(name.split(" "))
browser.get(track_url + name)
continue
# search for an artist
if choice == 2:
name = input("Name of the artist: ")
print()
"%20".join(name.split(" "))
browser.get(artist_url + name)
continue
if choice == 3:
name = input("Name of the mix: ")
print()
"%20".join(name.split(" "))
browser.get(track_url + name + mix_url_end)
continue
# genre menu
while True:
print(">>> Genres Available:")
print()
# genre menu
url = ''
if choice == 4: url = top_url
else: url = new_url
# parse the html with beautiful soup
request = requests.get(url)
soup = bs4.BeautifulSoup(request.text, "lxml")
# print request.text
genres = soup.select("a[href*=genre]")[2:]
# print each genre
genre_links = []
# print out the available genres
for index, genre in enumerate(genres):
print(str(index) + ": " + genre.text)
genre_links.append(genre.get("href"))
print()
choice = input(">>> Your choice (x to re-select chart type): ")
print()
if choice == 'x': break
else: choice = int(choice)
# print(genre_links[choice])
url = "http://soundcloud.com" + genre_links[choice]
request = requests.get(url)
soup = bs4.BeautifulSoup(request.text, "lxml")
tracks = soup.select("h2")[3:]
track_links = []
track_names = []
# print(tracks)
for index, track in enumerate(tracks):
track_links.append(track.a.get("href"))
track_names.append(track.text)
print(str(index+1) + ": " + track.text)
print()
# song selection loop
while True:
choice = input(">>> Your choice (x to re-select genre): ")
print()
if choice == 'x': break
else: choice = int(choice)-1
print("Now playing: " + track_names[choice])
print()
browser.get("http://soundcloud.com" + track_links[choice])
print()
print("Goodbye!")
print()
@eugenefauntleroy
Copy link

eugenefauntleroy commented Jan 27, 2020 via email

@iannase
Copy link
Author

iannase commented Jan 27, 2020

Are you running the code inside of the Terminal by navigating to the directory the file is located in and using the command "python3 soundscraper.py"?

@eugenefauntleroy
Copy link

eugenefauntleroy commented Jan 28, 2020 via email

@eugenefauntleroy
Copy link

eugenefauntleroy commented Jan 28, 2020 via email

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment