Created
October 31, 2021 13:37
-
-
Save parnexcodes/e9b437f5cb5c09afa235ed8ea0cc68ff to your computer and use it in GitHub Desktop.
imdb.com/top scraping
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import final | |
import requests | |
import pprint | |
from bs4 import BeautifulSoup | |
def get_recent(): | |
URL = f"https://www.imdb.com/chart/top/" | |
r = requests.get(URL) | |
soup = BeautifulSoup(r.content, 'lxml') | |
items = soup.find_all('tbody', {'class': 'lister-list'}) | |
final_data = [] | |
for item in items: | |
for list in item.find_all('tr'): | |
for poster_column in list.find_all('td', {'class': 'posterColumn'}): | |
rank = poster_column.find('span', {'name': 'rk'})['data-value'] | |
poster = poster_column.a.img['src'] | |
for title_column in list.find_all('td', {'class': 'titleColumn'}): | |
link = title_column.a.get('href') | |
title = title_column.a.text | |
release_year = title_column.find('span', {'class': 'secondaryInfo'}).text.strip('(,)') | |
for rating_column in list.find_all('td', {'class': 'ratingColumn imdbRating'}): | |
rating = rating_column.text.strip() | |
data = {'rank': rank, 'poster': poster, 'link': f'https://imdb.com{link}', 'id': link[7:-1], 'title': title, 'release_year': release_year, 'rating': rating} | |
final_data.append(data) | |
return pprint.pprint(final_data) | |
get_recent() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Result : https://nekobin.com/tolibirufo