Skip to content

Instantly share code, notes, and snippets.

@RajeshKrSahoo
Created March 14, 2020 15:43
Show Gist options
  • Save RajeshKrSahoo/319c7f0fe5b6749982d89ab4f766f3f4 to your computer and use it in GitHub Desktop.
Save RajeshKrSahoo/319c7f0fe5b6749982d89ab4f766f3f4 to your computer and use it in GitHub Desktop.
# Import necessary libraries
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup as bs
from datetime import datetime
def scrap_covid19_data():
''' function to scrap the COVID19 affected country wise '''
header = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
"X-Requested-With": "XMLHttpRequest"
}
url = f'https://www.worldometers.info/coronavirus/#countries'
try:
r = requests.get(url, headers=header, timeout=10)
soup = bs(r.text, 'html.parser')
return soup
'''We also can directly read the data from the get request and parse using Pandas table'''
#covid_data=pd.read_html(r.text)[0]
#print(covid_data)
except Exception as e:
print(f'error: {e}')
def extract_covid_data():
soup=scrap_covid19_data()
covid_table = []
table = soup.find_all(name="table", attrs={"id":"main_table_countries"})
## for header data
table_head= table[0].find_all('th')
thead=[i.text.strip() for i in table_head]
print(thead)
#for rows data
table_rows= table[0].find_all('tr')
for tr in table_rows:
td= tr.find_all('td')
rows = [i.text.strip() for i in td]
covid_table.append(rows)
corona_data=pd.DataFrame(covid_table,columns=thead,dtype = float)
corona_data=corona_data.replace('None', np.nan).dropna(how='all')
corona_data.fillna(0,inplace=True)
return corona_data ##.sort_values('Country,Other')
corona_data=extract_covid_data()
def search_country(country=None,df=corona_data):
if country == None:
print(" Updating for all")
df=extract_covid_data()
return df
else:
country=' '.join([i.capitalize() for i in country.split()])
# if any(str(elem) in ['Iran'] for elem in df['Country,Other'].tolist()):
# print('yes present')
# else:
# print("The Country is not Availble")
if country in df['Country,Other'].tolist():
covid_outbreak=df.loc[df['Country,Other']==country]
return covid_outbreak
else:
print("Nope The given Country information is not Avaialable")
return f'The given Country information is not Avaialable'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment