Created
March 14, 2020 15:31
-
-
Save RajeshKrSahoo/2eb08a4f8ca6f38db53b57762e6b24ca to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def scrap_covid19_data(): | |
''' function to scrap the COVID19 affected country wise ''' | |
header = { | |
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36", | |
"X-Requested-With": "XMLHttpRequest" | |
} | |
url = f'https://www.worldometers.info/coronavirus/#countries' | |
try: | |
r = requests.get(url, headers=header, timeout=10) | |
soup = bs(r.text, 'html.parser') | |
return soup | |
'''We also can directly read the data from the get request and parse using Pandas table''' | |
#covid_data=pd.read_html(r.text)[0] | |
#print(covid_data) | |
except Exception as e: | |
print(f'error: {e}') | |
def extract_covid_data(): | |
soup=scrap_covid19_data() | |
covid_table = [] | |
table = soup.find_all(name="table", attrs={"id":"main_table_countries"}) | |
## for header data | |
table_head= table[0].find_all('th') | |
thead=[i.text.strip() for i in table_head] | |
print(thead) | |
#for rows data | |
table_rows= table[0].find_all('tr') | |
for tr in table_rows: | |
td= tr.find_all('td') | |
rows = [i.text.strip() for i in td] | |
covid_table.append(rows) | |
corona_data=pd.DataFrame(covid_table,columns=thead,dtype = float) | |
corona_data=corona_data.replace('None', np.nan).dropna(how='all') | |
corona_data.fillna(0,inplace=True) | |
return corona_data ##.sort_values('Country,Other') | |
corona_data=extract_covid_data() | |
def search_country(country=None,df=corona_data): | |
if country == None: | |
print(" Updating for all") | |
df=extract_covid_data() | |
return df | |
else: | |
country=' '.join([i.capitalize() for i in country.split()]) | |
# if any(str(elem) in ['Iran'] for elem in df['Country,Other'].tolist()): | |
# print('yes present') | |
# else: | |
# print("The Country is not Availble") | |
if country in df['Country,Other'].tolist(): | |
covid_outbreak=df.loc[df['Country,Other']==country] | |
return covid_outbreak | |
else: | |
print("Nope The given Country information is not Avaialable") | |
return f'The given Country information is not Avaialable' | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment