Created
June 16, 2020 10:34
-
-
Save NaxAlpha/2204e4d940c552d72f67dc11b22dd373 to your computer and use it in GitHub Desktop.
IMDB Selenium Scrapper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Researches a topic on the internet and collects content required for video generation. | |
| from enum import Enum | |
| from selenium import webdriver | |
| # from selenium.webdriver.common.keys import Keys | |
| # Trailer Compiler Template | |
| class SearchTypes(Enum): | |
| Popular = 'moviemeter' | |
| Rating = 'user_rating' | |
| BoxOffice = 'boxoffice_gross_us' | |
| Length = 'runtime' | |
| Year = 'year' | |
| MODE = SearchTypes.Rating | |
| driver = webdriver.Chrome() | |
| driver.get('https://www.imdb.com/search/title?groups=top_250&view=advanced&sort=%s,asc' % MODE.value) | |
| movie_list = driver.find_elements_by_css_selector('.lister-item.mode-advanced') | |
| info = [] | |
| for movie_info in movie_list: | |
| title_info = movie_info.find_element_by_css_selector('.lister-item-header') | |
| link_info = title_info.find_element_by_tag_name('a') | |
| rating_info = movie_info.find_element_by_css_selector('.ratings-imdb-rating') | |
| try: | |
| _link = link_info.get_attribute('href') | |
| _year = title_info.find_element_by_css_selector('.lister-item-year').text.strip('()') | |
| _genre = movie_info.find_element_by_css_selector('.genre').text.split(',') | |
| _title = link_info.text; print(_title) | |
| _votes = movie_info.find_elements_by_name('nv')[0].get_attribute('data-value') | |
| _gross = movie_info.find_elements_by_name('nv')[1].get_attribute('data-value') | |
| _length = movie_info.find_element_by_css_selector('.runtime').text | |
| _rating = rating_info.find_element_by_tag_name('strong').text | |
| _trivia = movie_info.find_elements_by_css_selector('.text-muted')[2].text | |
| _meta_score = movie_info.find_element_by_css_selector('.metascore').text | |
| _certificate = movie_info.find_element_by_css_selector('.certificate').text | |
| movie = dict(link=_link, year=_year, genre=_genre, title=_title, | |
| votes=_votes, gross=_gross, length=_length, rating=_rating, | |
| trivia=_trivia, meta_score=_meta_score, certificate=_certificate) | |
| info.append(movie) | |
| except: pass | |
| driver.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment