Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save audhiaprilliant/e7f430437fe4217a77e4851f0228bbe5 to your computer and use it in GitHub Desktop.

Select an option

Save audhiaprilliant/e7f430437fe4217a77e4851f0228bbe5 to your computer and use it in GitHub Desktop.
Apache Airflow as Job Orchestration
# Function to get the daily province's data
def get_daily_summary_provinces(**kwargs):
soup = get_url()
date,time = get_current_date()
# Get summary - provinsi
# Regular expression pattern
pattern_prov = re.compile(r'\d+')
provinsi = []
terkonfirmasi_prov = []
meninggal_prov = []
sembuh_prov = []
for elem in soup.find_all('div',class_='covid__row'):
provinsi_elem = elem.find('div',class_='covid__prov')
terkonfirmasi_elem = elem.find('span',class_='-odp')
meninggal_elem = elem.find('span',class_='-gone')
sembuh_elem = elem.find('span',class_='-health')
# Append to list
provinsi.append(provinsi_elem.text)
terkonfirmasi_prov.append(pattern_prov.findall(terkonfirmasi_elem.text)[0])
meninggal_prov.append(pattern_prov.findall(meninggal_elem.text)[0])
sembuh_prov.append(pattern_prov.findall(sembuh_elem.text)[0])
# Create dataframe
dic_data = {'date':[date]*len(provinsi),'time':[time]*len(provinsi),'provinces':provinsi,
'confirmed':terkonfirmasi_prov,'deaths':meninggal_prov,'recovered':sembuh_prov}
df = pd.DataFrame(data = dic_data)
return(df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment