aronj · February 4, 2023 19:24
diff --git a/filmtipset2imdb.py b/filmtipset2imdb.py
 import glob
 import os
 import re
 import time

 import pandas as pd
 from selenium import webdriver
 from selenium.common import NoSuchElementException
 from selenium.webdriver import ActionChains
 from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.common.by import By
 from tqdm import tqdm
 from user_agent import generate_user_agent

 '''
 Install:
 pip install selenium pandas tqdm user_agent

 Run:
 IMDB_EMAIL=abc IMDB_PASSWORD=abc python filmtipset2imdb.py
 '''


 def element_exists(**kwargs):
    try:
        driver.find_element(**kwargs)
    except NoSuchElementException:
        return False
    return True


 opts = Options()
 opts.add_argument("user-agent=" + generate_user_agent())

 driver = webdriver.Chrome(options=opts)

 driver.get('https://imdb.com')

 driver.find_element(by=By.XPATH, value='//*[text()="Sign In"]').click()
 driver.find_element(by=By.XPATH, value='//*[text()="Sign in with IMDb"]').click()
 driver.find_element(by=By.NAME, value='email').send_keys(os.environ['IMDB_EMAIL'])
 driver.find_element(by=By.NAME, value='password').send_keys(os.environ['IMDB_PASSWORD'])
 driver.find_element(by=By.ID, value='signInSubmit').click()
 tqdm.pandas()

 csv_path = max(glob.iglob('ft_betyg_*.csv'), key=os.path.getctime)

 with open(csv_path, 'r') as f:
    lines = f.readlines()
    lines = [re.sub(r'(^[0-9-]{10}),', r'\1;', line) for line in lines]
 with open(csv_path, 'w') as f:
    f.writelines(lines)

 df = pd.read_csv(csv_path, sep=';', header=0)
 t = tqdm(df.itertuples(), total=len(df))
 skipped = 0
 for row in t:
    url = f'https://www.imdb.com/title/tt{str(row.IMDB).zfill(7)}'
    t.set_description(url)
    t.set_postfix({'skipped': skipped})
    driver.get(url)

    time.sleep(1.5)

    if not element_exists(by=By.ID, value='iconContext-star-border'):
        skipped += 1
        continue

    rating = row.Score * 2 - 1
    try:
        driver.find_element(by=By.XPATH, value='//*[text()="Rate"]').click()
        element = driver.find_element(by=By.XPATH, value=f'//*[contains(@aria-label, "Rate {rating}")]')

        actions = ActionChains(driver)
        actions.move_to_element(element).click().perform()

        driver.find_element(by=By.XPATH, value='//button[./*[text()="Rate"]]').click()
    except Exception as e:
        with open('test.txt', 'a') as f:
            f.write(f'{url} {rating}\n')
	import glob
	import os
	import re
	import time

	import pandas as pd
	from selenium import webdriver
	from selenium.common import NoSuchElementException
	from selenium.webdriver import ActionChains
	from selenium.webdriver.chrome.options import Options
	from selenium.webdriver.common.by import By
	from tqdm import tqdm
	from user_agent import generate_user_agent

	'''
	Install:
	pip install selenium pandas tqdm user_agent

	Run:
	IMDB_EMAIL=abc IMDB_PASSWORD=abc python filmtipset2imdb.py
	'''


	def element_exists(**kwargs):
	try:
	driver.find_element(**kwargs)
	except NoSuchElementException:
	return False
	return True


	opts = Options()
	opts.add_argument("user-agent=" + generate_user_agent())

	driver = webdriver.Chrome(options=opts)

	driver.get('https://imdb.com')

	driver.find_element(by=By.XPATH, value='//*[text()="Sign In"]').click()
	driver.find_element(by=By.XPATH, value='//*[text()="Sign in with IMDb"]').click()
	driver.find_element(by=By.NAME, value='email').send_keys(os.environ['IMDB_EMAIL'])
	driver.find_element(by=By.NAME, value='password').send_keys(os.environ['IMDB_PASSWORD'])
	driver.find_element(by=By.ID, value='signInSubmit').click()
	tqdm.pandas()

	csv_path = max(glob.iglob('ft_betyg_*.csv'), key=os.path.getctime)

	with open(csv_path, 'r') as f:
	lines = f.readlines()
	lines = [re.sub(r'(^[0-9-]{10}),', r'\1;', line) for line in lines]
	with open(csv_path, 'w') as f:
	f.writelines(lines)

	df = pd.read_csv(csv_path, sep=';', header=0)
	t = tqdm(df.itertuples(), total=len(df))
	skipped = 0
	for row in t:
	url = f'https://www.imdb.com/title/tt{str(row.IMDB).zfill(7)}'
	t.set_description(url)
	t.set_postfix({'skipped': skipped})
	driver.get(url)

	time.sleep(1.5)

	if not element_exists(by=By.ID, value='iconContext-star-border'):
	skipped += 1
	continue

	rating = row.Score * 2 - 1
	try:
	driver.find_element(by=By.XPATH, value='//*[text()="Rate"]').click()
	element = driver.find_element(by=By.XPATH, value=f'//*[contains(@aria-label, "Rate {rating}")]')

	actions = ActionChains(driver)
	actions.move_to_element(element).click().perform()

	driver.find_element(by=By.XPATH, value='//button[./*[text()="Rate"]]').click()
	except Exception as e:
	with open('test.txt', 'a') as f:
	f.write(f'{url} {rating}\n')