Skip to content

Instantly share code, notes, and snippets.

@amenayach
Last active October 30, 2019 08:53
Show Gist options
  • Save amenayach/81330c577047b0b19689568a435063bd to your computer and use it in GitHub Desktop.
Save amenayach/81330c577047b0b19689568a435063bd to your computer and use it in GitHub Desktop.
Simple python script that retrieves Lebanon files site latest news (use Jupyter for best view)
from requests import get
from requests.exceptions import RequestException
from contextlib import closing
from bs4 import BeautifulSoup
#Best used via Jupyter
def download_text(url):
try:
with closing(get(url, stream=True)) as resp:
if is_good_response(resp):
return resp.content
else:
return None
except RequestException as e:
log_error('Error during requests to {0} : {1}'.format(url, str(e)))
return None
def is_good_response(resp):
content_type = resp.headers['Content-Type'].lower()
return (resp.status_code == 200
and content_type is not None
and content_type.find('html') > -1)
def log_error(e):
print(e)
def get_lb():
url = 'http://www.lebanonfiles.com/category/2'
response = download_text(url)
if response is not None:
html = BeautifulSoup(response, 'html.parser')
for p in html.select('.content-detail-section > p'):
print(p.text.strip())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment