Skip to content

Instantly share code, notes, and snippets.

@elioduran60
Forked from eduardogpg/scraper.py
Created April 28, 2023 23:44
Show Gist options
  • Save elioduran60/475790ae471b834de8bd574f88271a73 to your computer and use it in GitHub Desktop.
Save elioduran60/475790ae471b834de8bd574f88271a73 to your computer and use it in GitHub Desktop.
Una clase del BC de Ciencia de datos.
import requests
from bs4 import BeautifulSoup
URL = 'https://www.imdb.com/calendar/?region=MX'
"""
1.- Obtener el maqueto HTML
- Si el archivo HTML no existe de forma local, crearlo.
- Si el archivo HTML existe de forma local, obtener su contenido.
2.- Obtener la información
3.- Generar un archivo CSV
"""
def get_imdb_content():
headers = {
'User-Agent': 'Mozilla/5.0'
}
response = requests.get(URL, headers=headers) # 20x - 30x - 40x - 50x
if response.status_code == 200:
return response.text
return None
def create_imdb_file_local(content):
try:
with open('imdb.html', 'w') as file:
file.write(content)
except:
pass
def get_imdb_file_local():
content = None
try:
with open('imdb.html', 'r') as file:
content = file.read()
except:
pass
return content
def get_local_imdb_content():
content = get_imdb_file_local()
if content:
return content
content = get_imdb_content()
create_imdb_file_local(content)
return content
def main():
content = get_local_imdb_content()
print(content)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment