Skip to content

Instantly share code, notes, and snippets.

@exy02
exy02 / data_vis_mining_1_extraction_1.py
Last active September 25, 2020 05:41
How to Extract Data from Online Data Visualizations - 1. Data Extraction _1
sesh = requests.Session()
response = sesh.get('https://ga-covid19.ondemand.sas.com/static/js/main.js')
response_text = response.text
@exy02
exy02 / data_vis_mining_1_extraction_2.py
Last active September 25, 2020 05:41
How to Extract Data from Online Data Visualizations - 1. Data Extraction _2
json_extract = response_text.split("{t.exports=JSON.parse('[")[1].split("]\')},function(t)")[0]
@exy02
exy02 / data_vis_mining_2_cleaning_1.py
Last active September 25, 2020 05:40
How to Extract Data from Online Data Visualizations - 2. Data Cleaning _1
json_list = json_extract.split("},{")
json_list = [sub.replace("{", "") for sub in json_list]
json_list = ["["+sub+"]" for sub in json_list]
@exy02
exy02 / data_vis_mining_2_cleaning_2.py
Last active September 25, 2020 05:40
How to Extract Data from Online Data Visualizations - 2. Data Cleaning _2
json_df = pd.DataFrame(json_list,columns=["new"])
json_df = json_df["new"].str.split(",", expand = True)
col_names = ['Measure','County','Test Date','Positives','Death Count','Cumulative Positives',
'Cumulative Deaths','Moving Average (Cases)','Moving Average (Deaths)']
json_df.columns = col_names
@exy02
exy02 / data_vis_mining_2_cleaning_3.py
Created September 25, 2020 05:42
How to Extract Data from Online Data Visualizations - 2. Data Cleaning _3
json_df[json_df.columns] = json_df[json_df.columns].replace({'"':'',']':'','}':''},regex=True)
for col_name in col_names:
json_df[col_name] = json_df[col_name].str.split(":",expand=True)[1]
json_df = json_df.sort_values(by=['Test Date','Measure','County'], ascending=[False,False,True])
@exy02
exy02 / data_vis_mining_3_exporting_1.py
Created September 25, 2020 05:46
How to Extract Data from Online Data Visualizations - 3. Data Exporting _1
todays_date = date.today().strftime("%m_%d_%Y")
json_df.to_csv(f'/path/to/directory/Georgia_Extract_({todays_date}).csv',index=False)
@exy02
exy02 / simple_pgres_query_1.py
Created September 26, 2020 18:49
Simplified PostgreSQL querying (with or without SSH) - Base code
from sshtunnel import SSHTunnelForwarder
from sqlalchemy.orm import sessionmaker
from sqlalchemy import create_engine
from sqlalchemy import inspect
import pandas as pd
class Postgresql_connect(object):
def __init__(self, pgres_host, pgres_port, db, ssh, ssh_user, ssh_host, ssh_pkey):
# SSH Tunnel Variables
self.pgres_host = pgres_host
@exy02
exy02 / simple_pgres_query_2.py
Last active September 26, 2020 20:36
Simplified PostgreSQL querying (with or without SSH) - Class Initialization
p_host = '0.0.0.0'
p_port = 5432
db = 'database_name'
ssh = True
ssh_user = 'ssh_user'
ssh_host = 'ip address or web address'
ssh_pkey = '/path/to/user_authentication.pem'
pgres = Postgresql_connect(pgres_host=p_host, pgres_port=p_port, db=db, ssh=ssh, ssh_user=ssh_user, ssh_host=ssh_host, ssh_pkey=ssh_pkey)
#initiates a connection to the PostgreSQL database. In this instance we use ssh and must specify our ssh credentials.
@exy02
exy02 / simple_pgres_query_3.py
Created September 26, 2020 19:09
Simplified PostgreSQL querying (with or without SSH) - Return Schemas
pgres.schemas(db='database_name')
#returns the number of schemas and all schema names within the specified database as a pandas dataframe
@exy02
exy02 / simple_pgres_query_4.py
Created September 26, 2020 19:11
Simplified PostgreSQL querying (with or without SSH) - Return Tables
pgres.tables(db='database_name',schema='schema_name')
#returns the number of tables and all table names within the specified schema as a pandas dataframe