Skip to content

Instantly share code, notes, and snippets.

@aronj
Created February 4, 2023 19:24
Show Gist options
  • Save aronj/a803978266f7571286ec5ea4980bca2e to your computer and use it in GitHub Desktop.
Save aronj/a803978266f7571286ec5ea4980bca2e to your computer and use it in GitHub Desktop.
import glob
import os
import re
import time
import pandas as pd
from selenium import webdriver
from selenium.common import NoSuchElementException
from selenium.webdriver import ActionChains
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from tqdm import tqdm
from user_agent import generate_user_agent
'''
Install:
pip install selenium pandas tqdm user_agent
Run:
IMDB_EMAIL=abc IMDB_PASSWORD=abc python filmtipset2imdb.py
'''
def element_exists(**kwargs):
try:
driver.find_element(**kwargs)
except NoSuchElementException:
return False
return True
opts = Options()
opts.add_argument("user-agent=" + generate_user_agent())
driver = webdriver.Chrome(options=opts)
driver.get('https://imdb.com')
driver.find_element(by=By.XPATH, value='//*[text()="Sign In"]').click()
driver.find_element(by=By.XPATH, value='//*[text()="Sign in with IMDb"]').click()
driver.find_element(by=By.NAME, value='email').send_keys(os.environ['IMDB_EMAIL'])
driver.find_element(by=By.NAME, value='password').send_keys(os.environ['IMDB_PASSWORD'])
driver.find_element(by=By.ID, value='signInSubmit').click()
tqdm.pandas()
csv_path = max(glob.iglob('ft_betyg_*.csv'), key=os.path.getctime)
with open(csv_path, 'r') as f:
lines = f.readlines()
lines = [re.sub(r'(^[0-9-]{10}),', r'\1;', line) for line in lines]
with open(csv_path, 'w') as f:
f.writelines(lines)
df = pd.read_csv(csv_path, sep=';', header=0)
t = tqdm(df.itertuples(), total=len(df))
skipped = 0
for row in t:
url = f'https://www.imdb.com/title/tt{str(row.IMDB).zfill(7)}'
t.set_description(url)
t.set_postfix({'skipped': skipped})
driver.get(url)
time.sleep(1.5)
if not element_exists(by=By.ID, value='iconContext-star-border'):
skipped += 1
continue
rating = row.Score * 2 - 1
try:
driver.find_element(by=By.XPATH, value='//*[text()="Rate"]').click()
element = driver.find_element(by=By.XPATH, value=f'//*[contains(@aria-label, "Rate {rating}")]')
actions = ActionChains(driver)
actions.move_to_element(element).click().perform()
driver.find_element(by=By.XPATH, value='//button[./*[text()="Rate"]]').click()
except Exception as e:
with open('test.txt', 'a') as f:
f.write(f'{url} {rating}\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment