Created
March 14, 2020 15:43
-
-
Save RajeshKrSahoo/319c7f0fe5b6749982d89ab4f766f3f4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import necessary libraries | |
import pandas as pd | |
import numpy as np | |
import requests | |
from bs4 import BeautifulSoup as bs | |
from datetime import datetime | |
def scrap_covid19_data(): | |
''' function to scrap the COVID19 affected country wise ''' | |
header = { | |
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36", | |
"X-Requested-With": "XMLHttpRequest" | |
} | |
url = f'https://www.worldometers.info/coronavirus/#countries' | |
try: | |
r = requests.get(url, headers=header, timeout=10) | |
soup = bs(r.text, 'html.parser') | |
return soup | |
'''We also can directly read the data from the get request and parse using Pandas table''' | |
#covid_data=pd.read_html(r.text)[0] | |
#print(covid_data) | |
except Exception as e: | |
print(f'error: {e}') | |
def extract_covid_data(): | |
soup=scrap_covid19_data() | |
covid_table = [] | |
table = soup.find_all(name="table", attrs={"id":"main_table_countries"}) | |
## for header data | |
table_head= table[0].find_all('th') | |
thead=[i.text.strip() for i in table_head] | |
print(thead) | |
#for rows data | |
table_rows= table[0].find_all('tr') | |
for tr in table_rows: | |
td= tr.find_all('td') | |
rows = [i.text.strip() for i in td] | |
covid_table.append(rows) | |
corona_data=pd.DataFrame(covid_table,columns=thead,dtype = float) | |
corona_data=corona_data.replace('None', np.nan).dropna(how='all') | |
corona_data.fillna(0,inplace=True) | |
return corona_data ##.sort_values('Country,Other') | |
corona_data=extract_covid_data() | |
def search_country(country=None,df=corona_data): | |
if country == None: | |
print(" Updating for all") | |
df=extract_covid_data() | |
return df | |
else: | |
country=' '.join([i.capitalize() for i in country.split()]) | |
# if any(str(elem) in ['Iran'] for elem in df['Country,Other'].tolist()): | |
# print('yes present') | |
# else: | |
# print("The Country is not Availble") | |
if country in df['Country,Other'].tolist(): | |
covid_outbreak=df.loc[df['Country,Other']==country] | |
return covid_outbreak | |
else: | |
print("Nope The given Country information is not Avaialable") | |
return f'The given Country information is not Avaialable' | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment