RajeshKrSahoo · March 14, 2020 15:43
diff --git a/scrapping.py b/scrapping.py
 # Import necessary libraries
 import pandas as pd
 import numpy as np
 import requests
 from bs4 import BeautifulSoup as bs
 from datetime import datetime

 def scrap_covid19_data():

    ''' function to scrap the COVID19 affected country wise '''
    
    
    header = {
              "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
              "X-Requested-With": "XMLHttpRequest"
             }

    url = f'https://www.worldometers.info/coronavirus/#countries'
    

    try:
        r = requests.get(url, headers=header, timeout=10)
        soup = bs(r.text, 'html.parser')
        return soup
        
        '''We also can directly read the data from the get request and parse using Pandas table'''
         #covid_data=pd.read_html(r.text)[0]
         #print(covid_data)
    
        
    except Exception as e:
        print(f'error: {e}')
        
        

 def extract_covid_data(): 
    
    soup=scrap_covid19_data()
    
    covid_table = []
    table = soup.find_all(name="table", attrs={"id":"main_table_countries"})
    
    ## for header data
    table_head= table[0].find_all('th')
    thead=[i.text.strip() for i in table_head]
    print(thead)
    
    #for rows data
    table_rows= table[0].find_all('tr')
    for tr in table_rows:
        td= tr.find_all('td')
        rows = [i.text.strip() for i in td]
        covid_table.append(rows)
    
    corona_data=pd.DataFrame(covid_table,columns=thead,dtype = float)
    corona_data=corona_data.replace('None', np.nan).dropna(how='all')
    corona_data.fillna(0,inplace=True)
        
    return corona_data ##.sort_values('Country,Other')
    
  

 corona_data=extract_covid_data()

 def search_country(country=None,df=corona_data):


    if country == None:
        print(" Updating for all")
        df=extract_covid_data()
        return df
    
    else:
        
        country=' '.join([i.capitalize() for i in country.split()])

        # if any(str(elem) in ['Iran'] for elem in df['Country,Other'].tolist()):
        #     print('yes present')
        # else:
        #     print("The Country is not Availble")

        if country in df['Country,Other'].tolist():
            covid_outbreak=df.loc[df['Country,Other']==country]
            return covid_outbreak

        else:
            print("Nope The given Country information is not Avaialable")
            return f'The given Country information is not Avaialable'
	# Import necessary libraries
	import pandas as pd
	import numpy as np
	import requests
	from bs4 import BeautifulSoup as bs
	from datetime import datetime

	def scrap_covid19_data():

	''' function to scrap the COVID19 affected country wise '''


	header = {
	"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
	"X-Requested-With": "XMLHttpRequest"
	}

	url = f'https://www.worldometers.info/coronavirus/#countries'


	try:
	r = requests.get(url, headers=header, timeout=10)
	soup = bs(r.text, 'html.parser')
	return soup

	'''We also can directly read the data from the get request and parse using Pandas table'''
	#covid_data=pd.read_html(r.text)[0]
	#print(covid_data)


	except Exception as e:
	print(f'error: {e}')



	def extract_covid_data():

	soup=scrap_covid19_data()

	covid_table = []
	table = soup.find_all(name="table", attrs={"id":"main_table_countries"})

	## for header data
	table_head= table[0].find_all('th')
	thead=[i.text.strip() for i in table_head]
	print(thead)

	#for rows data
	table_rows= table[0].find_all('tr')
	for tr in table_rows:
	td= tr.find_all('td')
	rows = [i.text.strip() for i in td]
	covid_table.append(rows)

	corona_data=pd.DataFrame(covid_table,columns=thead,dtype = float)
	corona_data=corona_data.replace('None', np.nan).dropna(how='all')
	corona_data.fillna(0,inplace=True)

	return corona_data ##.sort_values('Country,Other')



	corona_data=extract_covid_data()

	def search_country(country=None,df=corona_data):


	if country == None:
	print(" Updating for all")
	df=extract_covid_data()
	return df

	else:

	country=' '.join([i.capitalize() for i in country.split()])

	# if any(str(elem) in ['Iran'] for elem in df['Country,Other'].tolist()):
	# print('yes present')
	# else:
	# print("The Country is not Availble")

	if country in df['Country,Other'].tolist():
	covid_outbreak=df.loc[df['Country,Other']==country]
	return covid_outbreak

	else:
	print("Nope The given Country information is not Avaialable")
	return f'The given Country information is not Avaialable'