Skip to content

Instantly share code, notes, and snippets.

View audhiaprilliant's full-sized avatar
🎯
Focusing

Audhi Aprilliant audhiaprilliant

🎯
Focusing
View GitHub Profile
@audhiaprilliant
audhiaprilliant / apache_airflow_covid19_save_summary.py
Created December 13, 2020 09:03
Apache Airflow as Job Orchestration
# Function to save the daily aggregated data
def summary_save_txt(**context):
value = context['task_instance'].xcom_pull(task_ids = 'summary_scraping_data')
with open(dir_path+'/data/covid19/summary_covid19.txt','r') as f:
lines = f.read().splitlines()
last_line = lines[-1]
if last_line == value:
notif = 'Last update:',re.findall(r'^(.+?),',last_line)[0]
else:
with open(dir_path+'/data/covid19/summary_covid19.txt','a+') as ff:
@audhiaprilliant
audhiaprilliant / apache_airflow_covid19_get_provinces_data.py
Last active December 13, 2020 09:05
Apache Airflow as Job Orchestration
# Function to get the daily province's data
def get_daily_summary_provinces(**kwargs):
soup = get_url()
date,time = get_current_date()
# Get summary - provinsi
# Regular expression pattern
pattern_prov = re.compile(r'\d+')
provinsi = []
terkonfirmasi_prov = []
meninggal_prov = []
@audhiaprilliant
audhiaprilliant / apache_airflow_covid19_save_provinces_data.py
Created December 13, 2020 09:08
Apache Airflow as Job Orchestration
# Function to save the daily province's data
def provinces_save_csv(**context):
value = context['task_instance'].xcom_pull(task_ids = 'provinces_scraping_data')
with open(dir_path+'/data/covid19/daily_update_covid.csv','r') as f:
lines = f.read().splitlines()
last_line = lines[-1]
if re.findall(r'^(.+?),',last_line)[0] == value['date'].unique().tolist()[0]:
notif = 'Last update:',re.findall(r'^(.+?),',last_line)[0]
else:
with open(dir_path+'/data/covid19/daily_update_covid.csv','a') as ff:
@audhiaprilliant
audhiaprilliant / apache_airflow_covid19_define_dag.py
Created December 13, 2020 09:10
Apache Airflow as Job Orchestration
# Set default args
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': datetime(2020, 5, 20),
'email': ['[email protected]'],
'email_on_failure': True,
'email_on_retry': False,
'retries': 3,
'retry_delay': timedelta(minutes=2)
@audhiaprilliant
audhiaprilliant / apache_airflow_covid19_create_tasks.py
Created December 13, 2020 09:11
Apache Airflow as Job Orchestration
# Echo task start
task_start = BashOperator(
task_id = 'start_task',
bash_command = 'echo start',
dag = dag
)
# Task 1: scraping daily summary data
summary_scraping = PythonOperator(
task_id = 'summary_scraping_data',
python_callable = get_daily_summary,
@audhiaprilliant
audhiaprilliant / apache_airflow_covid19_setup_dependencies.py
Created December 13, 2020 09:13
Apache Airflow as Job Orchestration
# Set up the dependencies
task_start >> summary_scraping >> summary_save >> provinces_scraping
provinces_scraping >> provinces_save >> send_email >> send_telegram >> finish_start
@audhiaprilliant
audhiaprilliant / apache_airflow_covid19_recap.py
Created December 13, 2020 09:17
Apache Airflow as Job Orchestration
# Modules for airflow
from airflow import DAG
from datetime import timedelta, datetime
from airflow.utils.dates import days_ago
from airflow.operators.bash_operator import BashOperator
from airflow.operators.python_operator import PythonOperator
from airflow.operators.email_operator import EmailOperator
# Modules for web scraping
import requests
from bs4 import BeautifulSoup
@audhiaprilliant
audhiaprilliant / data_viz_leaflet.R
Created December 13, 2020 09:20
Data Visualization using Leaflet
# Load the libraries
library(leaflet)
library(leaflet.extras)
library(dplyr)
# Load the data
data.location = read.csv('/path-to-file/Location Data.txt',
header = TRUE,
sep = ',')
# Create leaflet
@audhiaprilliant
audhiaprilliant / twitter_data_viz_import_module.R
Created December 13, 2020 09:24
Twitter Data Visualization using ggplot2
# Import libraries
library(ggplot2)
library(lubridate)
# Load the data of Joko Widodo
data.jokowi.df = read.csv(file = 'data-joko-widodo.csv',
header = TRUE,
sep = ',')
senti.jokowi = read.csv(file = 'sentiment-joko-widodo.csv',
header = TRUE,
@audhiaprilliant
audhiaprilliant / twitter_data_viz_barplot_tweet_activities_part_one.R
Last active December 13, 2020 09:40
Twitter Data Visualization using ggplot2
# BARPLOT OF TWEETS - JOKO WIDODO
data.jokowi.df$created = ymd_hms(data.jokowi.df$created,
tz = 'Asia/Jakarta')
# Another way to make 'Date' and 'Hour' variables
data.jokowi.df$date = date(data.jokowi.df$created)
data.jokowi.df$hour = hour(data.jokowi.df$created)
# Date 2019-05-29
data.jokowi.date1 = subset(x = data.jokowi.df,
date == '2019-05-29')
data.hour.date1 = data.frame(table(data.jokowi.date1$hour))