Skip to content

Instantly share code, notes, and snippets.

View angelicadietzel's full-sized avatar
🏠
Working from home

Angelica Dietzel angelicadietzel

🏠
Working from home
View GitHub Profile
import requests
from requests import get
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
url = "https://www.imdb.com/search/title/?groups=top_1000&ref_=adv_prv"
headers = {"Accept-Language": "en-US, en;q=0.5"}
results = requests.get(url, headers=headers)
movies.to_csv('movies.csv')
import requests
from requests import get
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
url = "https://www.imdb.com/search/title/?groups=top_1000&ref_=adv_prv"
headers = {"Accept-Language": "en-US, en;q=0.5"}
results = requests.get(url, headers=headers)
print(movies)
print(movies.dtypes)
movies['us_grossMillions'] = movies['us_grossMillions'].map(lambda x: x.lstrip('$').rstrip('M'))
movies['us_grossMillions'] = pd.to_numeric(movies['us_grossMillions'], errors='coerce')
movies['votes'] = movies['votes'].str.replace(',', '').astype(int)
movies['metascore'] = movies['metascore'].astype(int)
print(movies.dtypes)
movies['timeMin'] = movies['timeMin'].str.extract('(\d+)').astype(int)
movies['year'] = movies['year'].str.extract('(\d+)').astype(int)