Created
April 8, 2018 22:13
-
-
Save iannase/8e801029bdd3e4e52837fe1f7996be31 to your computer and use it in GitHub Desktop.
Python web scraper that gets the new / top 50 charts, also allows you to play tracks using the selenium browser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
import requests | |
import bs4 | |
import os | |
# new, top, or mix url | |
top_url = "https://soundcloud.com/charts/top" | |
new_url = "https://soundcloud.com/charts/new" | |
track_url = "https://soundcloud.com/search/sounds?q=" | |
artist_url = "https://soundcloud.com/search/people?q=" | |
mix_url_end = "&filter.duration=epic" | |
# create the selenium browser | |
browser = webdriver.Chrome('/Users/ian/Downloads/chromedriver') | |
browser.get("https://soundcloud.com") | |
# main menu | |
print() | |
print(">>> Welcome to the Python Soundcloud Scraper") | |
print(">>> Explore the Top / New & Hot Charts for all Genres") | |
print(">>> Search Soundcloud for Tracks, Artist, and Mixes") | |
print() | |
# new or top menu | |
while True: | |
print(">>> Menu") | |
print(">>> 1 - Search for a track") | |
print(">>> 2 - Search for an artist") | |
print(">>> 3 - Search for a mix") | |
print(">>> 4 - Top charts") | |
print(">>> 5 - New & hot charts") | |
print(">>> 0 - Exit") | |
print() | |
choice = int(input(">>> Your choice: ")) | |
if choice == 0: | |
browser.quit() | |
break | |
print() | |
# search for a track | |
if choice == 1: | |
name = input("Name of the track: ") | |
print() | |
"%20".join(name.split(" ")) | |
browser.get(track_url + name) | |
continue | |
# search for an artist | |
if choice == 2: | |
name = input("Name of the artist: ") | |
print() | |
"%20".join(name.split(" ")) | |
browser.get(artist_url + name) | |
continue | |
if choice == 3: | |
name = input("Name of the mix: ") | |
print() | |
"%20".join(name.split(" ")) | |
browser.get(track_url + name + mix_url_end) | |
continue | |
# genre menu | |
while True: | |
print(">>> Genres Available:") | |
print() | |
# genre menu | |
url = '' | |
if choice == 4: url = top_url | |
else: url = new_url | |
# parse the html with beautiful soup | |
request = requests.get(url) | |
soup = bs4.BeautifulSoup(request.text, "lxml") | |
# print request.text | |
genres = soup.select("a[href*=genre]")[2:] | |
# print each genre | |
genre_links = [] | |
# print out the available genres | |
for index, genre in enumerate(genres): | |
print(str(index) + ": " + genre.text) | |
genre_links.append(genre.get("href")) | |
print() | |
choice = input(">>> Your choice (x to re-select chart type): ") | |
print() | |
if choice == 'x': break | |
else: choice = int(choice) | |
# print(genre_links[choice]) | |
url = "http://soundcloud.com" + genre_links[choice] | |
request = requests.get(url) | |
soup = bs4.BeautifulSoup(request.text, "lxml") | |
tracks = soup.select("h2")[3:] | |
track_links = [] | |
track_names = [] | |
# print(tracks) | |
for index, track in enumerate(tracks): | |
track_links.append(track.a.get("href")) | |
track_names.append(track.text) | |
print(str(index+1) + ": " + track.text) | |
print() | |
# song selection loop | |
while True: | |
choice = input(">>> Your choice (x to re-select genre): ") | |
print() | |
if choice == 'x': break | |
else: choice = int(choice)-1 | |
print("Now playing: " + track_names[choice]) | |
print() | |
browser.get("http://soundcloud.com" + track_links[choice]) | |
print() | |
print("Goodbye!") | |
print() |
eugenefauntleroy
commented
Jan 28, 2020
via email
Yes, I am. I did forget to mention, the only thing I think I'm doing
different is that I'm using firefox and then geckodriver.
…On Mon, Jan 27, 2020 at 4:12 AM Ian Annase ***@***.***> wrote:
Are you running the code inside of the Terminal by navigating to the
directory the file is located in and using the command "python3
soundscraper.py"?
—
You are receiving this because you commented.
Reply to this email directly, view it on GitHub
<https://gist.github.com/8e801029bdd3e4e52837fe1f7996be31?email_source=notifications&email_token=AMMLO7VKWBGBTSASG5ZHQ3DQ73FRDA5CNFSM4KLV43P2YY3PNVWWK3TUL52HS4DFVNDWS43UINXW23LFNZ2KUY3PNVWWK3TUL5UWJTQAGAU56#gistcomment-3156447>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/AMMLO7QGJJHGPIS6JQ2FFDTQ73FRDANCNFSM4KLV43PQ>
.
Neve mind. It is working inside the terminal! thanks for your help. By the
way, I've been inspired to do a project since working on this. I'll keep
you posted, going to finish the second part of the tutorial.
Thanks again
…On Tue, Jan 28, 2020 at 2:57 AM Mich Iwata ***@***.***> wrote:
Yes, I am. I did forget to mention, the only thing I think I'm doing
different is that I'm using firefox and then geckodriver.
On Mon, Jan 27, 2020 at 4:12 AM Ian Annase ***@***.***>
wrote:
> Are you running the code inside of the Terminal by navigating to the
> directory the file is located in and using the command "python3
> soundscraper.py"?
>
> —
> You are receiving this because you commented.
> Reply to this email directly, view it on GitHub
> <https://gist.github.com/8e801029bdd3e4e52837fe1f7996be31?email_source=notifications&email_token=AMMLO7VKWBGBTSASG5ZHQ3DQ73FRDA5CNFSM4KLV43P2YY3PNVWWK3TUL52HS4DFVNDWS43UINXW23LFNZ2KUY3PNVWWK3TUL5UWJTQAGAU56#gistcomment-3156447>,
> or unsubscribe
> <https://github.com/notifications/unsubscribe-auth/AMMLO7QGJJHGPIS6JQ2FFDTQ73FRDANCNFSM4KLV43PQ>
> .
>
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment