Skip to content

Instantly share code, notes, and snippets.

@NaxAlpha
Created June 16, 2020 10:34
Show Gist options
  • Save NaxAlpha/2204e4d940c552d72f67dc11b22dd373 to your computer and use it in GitHub Desktop.
Save NaxAlpha/2204e4d940c552d72f67dc11b22dd373 to your computer and use it in GitHub Desktop.
IMDB Selenium Scrapper
# Researches a topic on the internet and collects content required for video generation.
from enum import Enum
from selenium import webdriver
# from selenium.webdriver.common.keys import Keys
# Trailer Compiler Template
class SearchTypes(Enum):
Popular = 'moviemeter'
Rating = 'user_rating'
BoxOffice = 'boxoffice_gross_us'
Length = 'runtime'
Year = 'year'
MODE = SearchTypes.Rating
driver = webdriver.Chrome()
driver.get('https://www.imdb.com/search/title?groups=top_250&view=advanced&sort=%s,asc' % MODE.value)
movie_list = driver.find_elements_by_css_selector('.lister-item.mode-advanced')
info = []
for movie_info in movie_list:
title_info = movie_info.find_element_by_css_selector('.lister-item-header')
link_info = title_info.find_element_by_tag_name('a')
rating_info = movie_info.find_element_by_css_selector('.ratings-imdb-rating')
try:
_link = link_info.get_attribute('href')
_year = title_info.find_element_by_css_selector('.lister-item-year').text.strip('()')
_genre = movie_info.find_element_by_css_selector('.genre').text.split(',')
_title = link_info.text; print(_title)
_votes = movie_info.find_elements_by_name('nv')[0].get_attribute('data-value')
_gross = movie_info.find_elements_by_name('nv')[1].get_attribute('data-value')
_length = movie_info.find_element_by_css_selector('.runtime').text
_rating = rating_info.find_element_by_tag_name('strong').text
_trivia = movie_info.find_elements_by_css_selector('.text-muted')[2].text
_meta_score = movie_info.find_element_by_css_selector('.metascore').text
_certificate = movie_info.find_element_by_css_selector('.certificate').text
movie = dict(link=_link, year=_year, genre=_genre, title=_title,
votes=_votes, gross=_gross, length=_length, rating=_rating,
trivia=_trivia, meta_score=_meta_score, certificate=_certificate)
info.append(movie)
except: pass
driver.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment