Created
February 4, 2023 19:24
-
-
Save aronj/a803978266f7571286ec5ea4980bca2e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
import os | |
import re | |
import time | |
import pandas as pd | |
from selenium import webdriver | |
from selenium.common import NoSuchElementException | |
from selenium.webdriver import ActionChains | |
from selenium.webdriver.chrome.options import Options | |
from selenium.webdriver.common.by import By | |
from tqdm import tqdm | |
from user_agent import generate_user_agent | |
''' | |
Install: | |
pip install selenium pandas tqdm user_agent | |
Run: | |
IMDB_EMAIL=abc IMDB_PASSWORD=abc python filmtipset2imdb.py | |
''' | |
def element_exists(**kwargs): | |
try: | |
driver.find_element(**kwargs) | |
except NoSuchElementException: | |
return False | |
return True | |
opts = Options() | |
opts.add_argument("user-agent=" + generate_user_agent()) | |
driver = webdriver.Chrome(options=opts) | |
driver.get('https://imdb.com') | |
driver.find_element(by=By.XPATH, value='//*[text()="Sign In"]').click() | |
driver.find_element(by=By.XPATH, value='//*[text()="Sign in with IMDb"]').click() | |
driver.find_element(by=By.NAME, value='email').send_keys(os.environ['IMDB_EMAIL']) | |
driver.find_element(by=By.NAME, value='password').send_keys(os.environ['IMDB_PASSWORD']) | |
driver.find_element(by=By.ID, value='signInSubmit').click() | |
tqdm.pandas() | |
csv_path = max(glob.iglob('ft_betyg_*.csv'), key=os.path.getctime) | |
with open(csv_path, 'r') as f: | |
lines = f.readlines() | |
lines = [re.sub(r'(^[0-9-]{10}),', r'\1;', line) for line in lines] | |
with open(csv_path, 'w') as f: | |
f.writelines(lines) | |
df = pd.read_csv(csv_path, sep=';', header=0) | |
t = tqdm(df.itertuples(), total=len(df)) | |
skipped = 0 | |
for row in t: | |
url = f'https://www.imdb.com/title/tt{str(row.IMDB).zfill(7)}' | |
t.set_description(url) | |
t.set_postfix({'skipped': skipped}) | |
driver.get(url) | |
time.sleep(1.5) | |
if not element_exists(by=By.ID, value='iconContext-star-border'): | |
skipped += 1 | |
continue | |
rating = row.Score * 2 - 1 | |
try: | |
driver.find_element(by=By.XPATH, value='//*[text()="Rate"]').click() | |
element = driver.find_element(by=By.XPATH, value=f'//*[contains(@aria-label, "Rate {rating}")]') | |
actions = ActionChains(driver) | |
actions.move_to_element(element).click().perform() | |
driver.find_element(by=By.XPATH, value='//button[./*[text()="Rate"]]').click() | |
except Exception as e: | |
with open('test.txt', 'a') as f: | |
f.write(f'{url} {rating}\n') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment