Created
October 16, 2024 16:36
-
-
Save CallMeTuesday/2ec5af399bfed2eac3745fa408b81cbc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.common.keys import Keys | |
from selenium.webdriver.common.action_chains import ActionChains | |
import time | |
import csv | |
import pickle | |
from os.path import exists | |
from bs4 import BeautifulSoup | |
import os | |
login_url = 'https://bandcamp.com/login' | |
# Update this variable to use your own Bandcamp username | |
username = 'USERNAME' | |
purchase_url = f'https://bandcamp.com/{username}/purchases' | |
cookies_file = 'bandcamp_cookies.pkl' | |
# Function to log in and get session using Selenium | |
def login_to_bandcamp_selenium(email, password, driver): | |
driver.get(login_url) | |
# Enter email and password and submit the form | |
email_field = driver.find_element(By.ID, 'username-field') | |
password_field = driver.find_element(By.ID, 'password-field') | |
email_field.send_keys(email) | |
password_field.send_keys(password) | |
password_field.send_keys(Keys.RETURN) | |
# Wait for login to complete (adjust the sleep time as needed) | |
time.sleep(5) | |
# Check if we successfully logged in by navigating to the purchases page | |
driver.get(purchase_url) | |
if f"{username}/purchases" in driver.current_url: | |
print("Logged in successfully.") | |
# Save cookies to a file | |
with open(cookies_file, 'wb') as f: | |
pickle.dump(driver.get_cookies(), f) | |
return driver | |
else: | |
print("Failed to log in. Please check your credentials or handle any CAPTCHA.") | |
driver.quit() | |
return None | |
# Function to load cookies and start session | |
def load_cookies(driver): | |
if exists(cookies_file): | |
driver.get(login_url) | |
with open(cookies_file, 'rb') as f: | |
cookies = pickle.load(f) | |
for cookie in cookies: | |
driver.add_cookie(cookie) | |
driver.get(purchase_url) | |
time.sleep(5) # Wait for session to load | |
if f"{username}/purchases" in driver.current_url: | |
print("Logged in with cookies successfully.") | |
return True | |
return False | |
# Function to load all purchases by scrolling | |
def load_all_purchases(driver): | |
try: | |
view_all_button = driver.find_element(By.CLASS_NAME, 'view-all-button') | |
view_all_button.click() | |
time.sleep(2) | |
except Exception as e: | |
print(f"View all button not found or could not be clicked: {e}") | |
# Scroll to the bottom of the page to load all purchases | |
last_height = driver.execute_script("return document.body.scrollHeight") | |
while True: | |
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") | |
time.sleep(2) # Wait for new purchases to load | |
new_height = driver.execute_script("return document.body.scrollHeight") | |
if new_height == last_height: | |
break | |
last_height = new_height | |
# Function to scrape purchases | |
def scrape_purchases_selenium(driver): | |
soup = BeautifulSoup(driver.page_source, 'html.parser') | |
purchases = [] | |
# Locate each purchase item | |
for item in soup.find_all('div', class_='purchases-item'): | |
# Extract the title, artist information, and album link | |
title_artist_div = item.find('a', class_='purchases-item-title') | |
if title_artist_div: | |
title = title_artist_div.find('strong').get_text(strip=True) if title_artist_div.find('strong') else 'Unknown' | |
artist = title_artist_div.get_text(strip=True).split('by ', 1)[-1] if 'by ' in title_artist_div.get_text(strip=True) else 'Unknown' | |
album_link = title_artist_div.get('href', None) | |
else: | |
title, artist, album_link = 'Unknown', 'Unknown', None | |
# Extract the purchase date | |
purchase_date_div = item.find('div', class_='purchases-item-date') | |
purchase_date = purchase_date_div.get_text(strip=True) if purchase_date_div else 'Unknown' | |
# Extract the total price | |
price_div = item.find('div', class_='purchases-item-total') | |
price = price_div.find_all('strong')[-1].contents[0].strip() if price_div and price_div.find_all('strong') else 'Unknown' | |
# Add the purchase details to the list | |
purchases.append([title, artist, purchase_date, price, album_link]) | |
return purchases | |
# Write purchases to CSV | |
def save_purchases_to_csv(purchases): | |
with open('bandcamp_purchases.csv', 'w', newline='', encoding='utf-8') as file: | |
writer = csv.writer(file) | |
writer.writerow(['Title', 'Artist', 'Purchase Date', 'Price', 'Link']) | |
for purchase in purchases: | |
title, artist, purchase_date, price, album_link = purchase | |
link = album_link if album_link else 'No Link' | |
writer.writerow([title, artist, purchase_date, price, link]) | |
print("CSV file created successfully.") | |
# Main function | |
if __name__ == "__main__": | |
driver = webdriver.Chrome() # Update with your ChromeDriver path if needed | |
# Try to load cookies | |
if not load_cookies(driver): | |
# If cookies are not available or invalid, ask for credentials and log in manually | |
email = os.getenv("BANDCAMP_EMAIL") | |
password = os.getenv("BANDCAMP_PASSWORD") | |
if not email or not password: | |
email = input("Enter your Bandcamp email: ") | |
import getpass | |
password = getpass.getpass("Enter your Bandcamp password: ") | |
driver = login_to_bandcamp_selenium(email, password, driver) | |
if driver: | |
load_all_purchases(driver) | |
purchases = scrape_purchases_selenium(driver) | |
if purchases: | |
save_purchases_to_csv(purchases) | |
else: | |
print("No purchases found.") | |
driver.quit() | |
else: | |
print("Script terminated due to login failure.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment