-
-
Save elioduran60/475790ae471b834de8bd574f88271a73 to your computer and use it in GitHub Desktop.
Una clase del BC de Ciencia de datos.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
URL = 'https://www.imdb.com/calendar/?region=MX' | |
""" | |
1.- Obtener el maqueto HTML | |
- Si el archivo HTML no existe de forma local, crearlo. | |
- Si el archivo HTML existe de forma local, obtener su contenido. | |
2.- Obtener la información | |
3.- Generar un archivo CSV | |
""" | |
def get_imdb_content(): | |
headers = { | |
'User-Agent': 'Mozilla/5.0' | |
} | |
response = requests.get(URL, headers=headers) # 20x - 30x - 40x - 50x | |
if response.status_code == 200: | |
return response.text | |
return None | |
def create_imdb_file_local(content): | |
try: | |
with open('imdb.html', 'w') as file: | |
file.write(content) | |
except: | |
pass | |
def get_imdb_file_local(): | |
content = None | |
try: | |
with open('imdb.html', 'r') as file: | |
content = file.read() | |
except: | |
pass | |
return content | |
def get_local_imdb_content(): | |
content = get_imdb_file_local() | |
if content: | |
return content | |
content = get_imdb_content() | |
create_imdb_file_local(content) | |
return content | |
def main(): | |
content = get_local_imdb_content() | |
print(content) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment