Skip to content

Instantly share code, notes, and snippets.

@CallMeTuesday
Created October 16, 2024 16:36
Show Gist options
  • Save CallMeTuesday/2ec5af399bfed2eac3745fa408b81cbc to your computer and use it in GitHub Desktop.
Save CallMeTuesday/2ec5af399bfed2eac3745fa408b81cbc to your computer and use it in GitHub Desktop.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
import time
import csv
import pickle
from os.path import exists
from bs4 import BeautifulSoup
import os
login_url = 'https://bandcamp.com/login'
# Update this variable to use your own Bandcamp username
username = 'USERNAME'
purchase_url = f'https://bandcamp.com/{username}/purchases'
cookies_file = 'bandcamp_cookies.pkl'
# Function to log in and get session using Selenium
def login_to_bandcamp_selenium(email, password, driver):
driver.get(login_url)
# Enter email and password and submit the form
email_field = driver.find_element(By.ID, 'username-field')
password_field = driver.find_element(By.ID, 'password-field')
email_field.send_keys(email)
password_field.send_keys(password)
password_field.send_keys(Keys.RETURN)
# Wait for login to complete (adjust the sleep time as needed)
time.sleep(5)
# Check if we successfully logged in by navigating to the purchases page
driver.get(purchase_url)
if f"{username}/purchases" in driver.current_url:
print("Logged in successfully.")
# Save cookies to a file
with open(cookies_file, 'wb') as f:
pickle.dump(driver.get_cookies(), f)
return driver
else:
print("Failed to log in. Please check your credentials or handle any CAPTCHA.")
driver.quit()
return None
# Function to load cookies and start session
def load_cookies(driver):
if exists(cookies_file):
driver.get(login_url)
with open(cookies_file, 'rb') as f:
cookies = pickle.load(f)
for cookie in cookies:
driver.add_cookie(cookie)
driver.get(purchase_url)
time.sleep(5) # Wait for session to load
if f"{username}/purchases" in driver.current_url:
print("Logged in with cookies successfully.")
return True
return False
# Function to load all purchases by scrolling
def load_all_purchases(driver):
try:
view_all_button = driver.find_element(By.CLASS_NAME, 'view-all-button')
view_all_button.click()
time.sleep(2)
except Exception as e:
print(f"View all button not found or could not be clicked: {e}")
# Scroll to the bottom of the page to load all purchases
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2) # Wait for new purchases to load
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
# Function to scrape purchases
def scrape_purchases_selenium(driver):
soup = BeautifulSoup(driver.page_source, 'html.parser')
purchases = []
# Locate each purchase item
for item in soup.find_all('div', class_='purchases-item'):
# Extract the title, artist information, and album link
title_artist_div = item.find('a', class_='purchases-item-title')
if title_artist_div:
title = title_artist_div.find('strong').get_text(strip=True) if title_artist_div.find('strong') else 'Unknown'
artist = title_artist_div.get_text(strip=True).split('by ', 1)[-1] if 'by ' in title_artist_div.get_text(strip=True) else 'Unknown'
album_link = title_artist_div.get('href', None)
else:
title, artist, album_link = 'Unknown', 'Unknown', None
# Extract the purchase date
purchase_date_div = item.find('div', class_='purchases-item-date')
purchase_date = purchase_date_div.get_text(strip=True) if purchase_date_div else 'Unknown'
# Extract the total price
price_div = item.find('div', class_='purchases-item-total')
price = price_div.find_all('strong')[-1].contents[0].strip() if price_div and price_div.find_all('strong') else 'Unknown'
# Add the purchase details to the list
purchases.append([title, artist, purchase_date, price, album_link])
return purchases
# Write purchases to CSV
def save_purchases_to_csv(purchases):
with open('bandcamp_purchases.csv', 'w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(['Title', 'Artist', 'Purchase Date', 'Price', 'Link'])
for purchase in purchases:
title, artist, purchase_date, price, album_link = purchase
link = album_link if album_link else 'No Link'
writer.writerow([title, artist, purchase_date, price, link])
print("CSV file created successfully.")
# Main function
if __name__ == "__main__":
driver = webdriver.Chrome() # Update with your ChromeDriver path if needed
# Try to load cookies
if not load_cookies(driver):
# If cookies are not available or invalid, ask for credentials and log in manually
email = os.getenv("BANDCAMP_EMAIL")
password = os.getenv("BANDCAMP_PASSWORD")
if not email or not password:
email = input("Enter your Bandcamp email: ")
import getpass
password = getpass.getpass("Enter your Bandcamp password: ")
driver = login_to_bandcamp_selenium(email, password, driver)
if driver:
load_all_purchases(driver)
purchases = scrape_purchases_selenium(driver)
if purchases:
save_purchases_to_csv(purchases)
else:
print("No purchases found.")
driver.quit()
else:
print("Script terminated due to login failure.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment