CallMeTuesday · October 16, 2024 16:36
diff --git a/Bandcamp Purchases Scraper b/Bandcamp Purchases Scraper
 from selenium import webdriver
 from selenium.webdriver.common.by import By
 from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.common.action_chains import ActionChains
 import time
 import csv
 import pickle
 from os.path import exists
 from bs4 import BeautifulSoup
 import os

 login_url = 'https://bandcamp.com/login'
 # Update this variable to use your own Bandcamp username
 username = 'USERNAME'
 purchase_url = f'https://bandcamp.com/{username}/purchases'
 cookies_file = 'bandcamp_cookies.pkl'

 # Function to log in and get session using Selenium
 def login_to_bandcamp_selenium(email, password, driver):
    driver.get(login_url)
    
    # Enter email and password and submit the form
    email_field = driver.find_element(By.ID, 'username-field')
    password_field = driver.find_element(By.ID, 'password-field')
    email_field.send_keys(email)
    password_field.send_keys(password)
    password_field.send_keys(Keys.RETURN)
    
    # Wait for login to complete (adjust the sleep time as needed)
    time.sleep(5)

    # Check if we successfully logged in by navigating to the purchases page
    
    driver.get(purchase_url)
    if f"{username}/purchases" in driver.current_url:
        print("Logged in successfully.")
        # Save cookies to a file
        with open(cookies_file, 'wb') as f:
            pickle.dump(driver.get_cookies(), f)
        return driver
    else:
        print("Failed to log in. Please check your credentials or handle any CAPTCHA.")
        driver.quit()
        return None

 # Function to load cookies and start session
 def load_cookies(driver):
    if exists(cookies_file):
        driver.get(login_url)
        with open(cookies_file, 'rb') as f:
            cookies = pickle.load(f)
            
            for cookie in cookies:
                driver.add_cookie(cookie)
        driver.get(purchase_url)
        time.sleep(5)  # Wait for session to load

        if f"{username}/purchases" in driver.current_url:
            print("Logged in with cookies successfully.")
            return True
    return False

 # Function to load all purchases by scrolling
 def load_all_purchases(driver):
    try:
        view_all_button = driver.find_element(By.CLASS_NAME, 'view-all-button')
        view_all_button.click()
        time.sleep(2)
    except Exception as e:
        print(f"View all button not found or could not be clicked: {e}")

    # Scroll to the bottom of the page to load all purchases
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)  # Wait for new purchases to load
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

 # Function to scrape purchases
 def scrape_purchases_selenium(driver):
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    purchases = []

    # Locate each purchase item
    for item in soup.find_all('div', class_='purchases-item'):
        # Extract the title, artist information, and album link
        title_artist_div = item.find('a', class_='purchases-item-title')
        if title_artist_div:
            title = title_artist_div.find('strong').get_text(strip=True) if title_artist_div.find('strong') else 'Unknown'
            artist = title_artist_div.get_text(strip=True).split('by ', 1)[-1] if 'by ' in title_artist_div.get_text(strip=True) else 'Unknown'
            album_link = title_artist_div.get('href', None)
        else:
            title, artist, album_link = 'Unknown', 'Unknown', None
        
        # Extract the purchase date
        purchase_date_div = item.find('div', class_='purchases-item-date')
        purchase_date = purchase_date_div.get_text(strip=True) if purchase_date_div else 'Unknown'
        
        # Extract the total price
        price_div = item.find('div', class_='purchases-item-total')
        price = price_div.find_all('strong')[-1].contents[0].strip() if price_div and price_div.find_all('strong') else 'Unknown'

        # Add the purchase details to the list
        purchases.append([title, artist, purchase_date, price, album_link])

    return purchases

 # Write purchases to CSV
 def save_purchases_to_csv(purchases):
    with open('bandcamp_purchases.csv', 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Title', 'Artist', 'Purchase Date', 'Price', 'Link'])
        for purchase in purchases:
            title, artist, purchase_date, price, album_link = purchase
            link = album_link if album_link else 'No Link'
            writer.writerow([title, artist, purchase_date, price, link])
    print("CSV file created successfully.")

 # Main function
 if __name__ == "__main__":
    driver = webdriver.Chrome()  # Update with your ChromeDriver path if needed

    # Try to load cookies
    if not load_cookies(driver):
        # If cookies are not available or invalid, ask for credentials and log in manually
        email = os.getenv("BANDCAMP_EMAIL")
        password = os.getenv("BANDCAMP_PASSWORD")
        if not email or not password:
            email = input("Enter your Bandcamp email: ")
            import getpass
            password = getpass.getpass("Enter your Bandcamp password: ")
        driver = login_to_bandcamp_selenium(email, password, driver)
    
    if driver:
        load_all_purchases(driver)
        purchases = scrape_purchases_selenium(driver)
        if purchases:
            save_purchases_to_csv(purchases)
        else:
            print("No purchases found.")
        driver.quit()
    else:
        print("Script terminated due to login failure.")
	from selenium import webdriver
	from selenium.webdriver.common.by import By
	from selenium.webdriver.common.keys import Keys
	from selenium.webdriver.common.action_chains import ActionChains
	import time
	import csv
	import pickle
	from os.path import exists
	from bs4 import BeautifulSoup
	import os

	login_url = 'https://bandcamp.com/login'
	# Update this variable to use your own Bandcamp username
	username = 'USERNAME'
	purchase_url = f'https://bandcamp.com/{username}/purchases'
	cookies_file = 'bandcamp_cookies.pkl'

	# Function to log in and get session using Selenium
	def login_to_bandcamp_selenium(email, password, driver):
	driver.get(login_url)

	# Enter email and password and submit the form
	email_field = driver.find_element(By.ID, 'username-field')
	password_field = driver.find_element(By.ID, 'password-field')
	email_field.send_keys(email)
	password_field.send_keys(password)
	password_field.send_keys(Keys.RETURN)

	# Wait for login to complete (adjust the sleep time as needed)
	time.sleep(5)

	# Check if we successfully logged in by navigating to the purchases page

	driver.get(purchase_url)
	if f"{username}/purchases" in driver.current_url:
	print("Logged in successfully.")
	# Save cookies to a file
	with open(cookies_file, 'wb') as f:
	pickle.dump(driver.get_cookies(), f)
	return driver
	else:
	print("Failed to log in. Please check your credentials or handle any CAPTCHA.")
	driver.quit()
	return None

	# Function to load cookies and start session
	def load_cookies(driver):
	if exists(cookies_file):
	driver.get(login_url)
	with open(cookies_file, 'rb') as f:
	cookies = pickle.load(f)

	for cookie in cookies:
	driver.add_cookie(cookie)
	driver.get(purchase_url)
	time.sleep(5) # Wait for session to load

	if f"{username}/purchases" in driver.current_url:
	print("Logged in with cookies successfully.")
	return True
	return False

	# Function to load all purchases by scrolling
	def load_all_purchases(driver):
	try:
	view_all_button = driver.find_element(By.CLASS_NAME, 'view-all-button')
	view_all_button.click()
	time.sleep(2)
	except Exception as e:
	print(f"View all button not found or could not be clicked: {e}")

	# Scroll to the bottom of the page to load all purchases
	last_height = driver.execute_script("return document.body.scrollHeight")
	while True:
	driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
	time.sleep(2) # Wait for new purchases to load
	new_height = driver.execute_script("return document.body.scrollHeight")
	if new_height == last_height:
	break
	last_height = new_height

	# Function to scrape purchases
	def scrape_purchases_selenium(driver):
	soup = BeautifulSoup(driver.page_source, 'html.parser')
	purchases = []

	# Locate each purchase item
	for item in soup.find_all('div', class_='purchases-item'):
	# Extract the title, artist information, and album link
	title_artist_div = item.find('a', class_='purchases-item-title')
	if title_artist_div:
	title = title_artist_div.find('strong').get_text(strip=True) if title_artist_div.find('strong') else 'Unknown'
	artist = title_artist_div.get_text(strip=True).split('by ', 1)[-1] if 'by ' in title_artist_div.get_text(strip=True) else 'Unknown'
	album_link = title_artist_div.get('href', None)
	else:
	title, artist, album_link = 'Unknown', 'Unknown', None

	# Extract the purchase date
	purchase_date_div = item.find('div', class_='purchases-item-date')
	purchase_date = purchase_date_div.get_text(strip=True) if purchase_date_div else 'Unknown'

	# Extract the total price
	price_div = item.find('div', class_='purchases-item-total')
	price = price_div.find_all('strong')[-1].contents[0].strip() if price_div and price_div.find_all('strong') else 'Unknown'

	# Add the purchase details to the list
	purchases.append([title, artist, purchase_date, price, album_link])

	return purchases

	# Write purchases to CSV
	def save_purchases_to_csv(purchases):
	with open('bandcamp_purchases.csv', 'w', newline='', encoding='utf-8') as file:
	writer = csv.writer(file)
	writer.writerow(['Title', 'Artist', 'Purchase Date', 'Price', 'Link'])
	for purchase in purchases:
	title, artist, purchase_date, price, album_link = purchase
	link = album_link if album_link else 'No Link'
	writer.writerow([title, artist, purchase_date, price, link])
	print("CSV file created successfully.")

	# Main function
	if __name__ == "__main__":
	driver = webdriver.Chrome() # Update with your ChromeDriver path if needed

	# Try to load cookies
	if not load_cookies(driver):
	# If cookies are not available or invalid, ask for credentials and log in manually
	email = os.getenv("BANDCAMP_EMAIL")
	password = os.getenv("BANDCAMP_PASSWORD")
	if not email or not password:
	email = input("Enter your Bandcamp email: ")
	import getpass
	password = getpass.getpass("Enter your Bandcamp password: ")
	driver = login_to_bandcamp_selenium(email, password, driver)

	if driver:
	load_all_purchases(driver)
	purchases = scrape_purchases_selenium(driver)
	if purchases:
	save_purchases_to_csv(purchases)
	else:
	print("No purchases found.")
	driver.quit()
	else:
	print("Script terminated due to login failure.")