Skip to content

Instantly share code, notes, and snippets.

@rchardptrsn
rchardptrsn / census_app.py
Last active March 10, 2020 16:51
Streamlit Census App
import streamlit as st
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import censusdata
st.image(image='censusimg.jpg',caption='https://pixabay.com/illustrations/magnifying-glass-human-head-faces-1607208/')
import zipfile36 as zipfile
import wget
import os
print('Beginning file download with wget module')
url = 'http://api.worldbank.org/v2/en/indicator/NY.GDP.MKTP.CD?downloadformat=csv'
wget.download(url, 'gdp data.zip')
with zipfile.ZipFile('gdp data.zip', "r") as z:
z.extractall("")
import matplotlib.pyplot as plt
import seaborn as sns
# Subset only the rows where GDP is NaN
gdp_nans = melted_data[melted_data['GDP'].isnull()]
# seaborn barplot of NaNs per year
plt.figure(figsize=(10,6))
ax = sns.countplot(x='Year',data=gdp_nans)
plt.title('Count of NaNs per Year')
# Set the Index on Country Name - previously was numeric.
gdp_transpose = gdp.set_index('Country Name')
# Transpose the index and columns
gdp_transpose = gdp_transpose.transpose()
# Reset the index, sets, 'index' as a column
gdp_transpose = gdp_transpose.reset_index()
# Rename the 'index' column to year
gdp_transpose = gdp_transpose.rename(columns={'index':'Year'})
# Set the DataFrame Index to newly named 'Year'
gdp_transpose = gdp_transpose.set_index('Year')
gdp_pct_diff = gdp_transpose
gdp_pct_diff = gdp_pct_diff.pct_change()
gdp_pct_diff.head()
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
# Initialize figure and size of the plot
plt.figure(figsize=(14,8))
# Seaborn line plot
ax = sns.lineplot(x='Year',y='Average GDP Pct Change',
data=pct_change,
import pandas as pd
import json
import azure.cosmos.cosmos_client as cosmos_client
import azure.cosmos.errors as errors
import azure.cosmos.documents as documents
import azure.cosmos.http_constants as http_constants
print('Imported packages successfully.')
# Initialize the Cosmos client
# Create a database
# https://docs.microsoft.com/en-us/python/api/azure-cosmos/azure.cosmos.cosmos_client.cosmosclient?view=azure-python#createdatabase-database--options-none-
database_name = 'HDIdatabase'
try:
database = client.CreateDatabase({'id': database_name})
except errors.HTTPFailure:
database = client.ReadDatabase("dbs/" + database_name)
# Create a collection of items in a Container
# Items are the individual rows/records of your dataset
# https://docs.microsoft.com/en-us/python/api/azure-cosmos/azure.cosmos.cosmos_client.cosmosclient?view=azure-python#createcontainer-database-link--collection--options-none-
database_link = 'dbs/' + 'HDIdatabase'
container_definition = {'id': 'HDIcontainer',
'partitionKey':
{
'paths': ['/country'],
'kind': documents.PartitionKind.Hash
# Download and read csv file
df = pd.read_csv('https://globaldatalab.org/assets/2019/09/SHDI%20Complete%203.0.csv',encoding='ISO-8859–1',dtype='str')
# Reset index - creates a column called 'index'
df = df.reset_index()
# Rename that new column 'id'
# Cosmos DB needs one column named 'id'.
df = df.rename(columns={'index':'id'})
# Convert the id column to a string - this is a document database.
df['id'] = df['id'].astype(str)