rchardptrsn’s gists

rchardptrsn / census_app.py

Last active March 10, 2020 16:51

Streamlit Census App

	import streamlit as st
	import numpy as np
	import pandas as pd
	import seaborn as sns
	import matplotlib.pyplot as plt
	import censusdata


	st.image(image='censusimg.jpg',caption='https://pixabay.com/illustrations/magnifying-glass-human-head-faces-1607208/')

rchardptrsn / download_gdp_dataset.py

Last active January 28, 2020 03:25

	import zipfile36 as zipfile
	import wget
	import os

	print('Beginning file download with wget module')
	url = 'http://api.worldbank.org/v2/en/indicator/NY.GDP.MKTP.CD?downloadformat=csv'
	wget.download(url, 'gdp data.zip')

	with zipfile.ZipFile('gdp data.zip', "r") as z:
	z.extractall("")

rchardptrsn / na_plot.py

Created January 29, 2020 12:26

	import matplotlib.pyplot as plt
	import seaborn as sns

	# Subset only the rows where GDP is NaN
	gdp_nans = melted_data[melted_data['GDP'].isnull()]

	# seaborn barplot of NaNs per year
	plt.figure(figsize=(10,6))
	ax = sns.countplot(x='Year',data=gdp_nans)
	plt.title('Count of NaNs per Year')

rchardptrsn / gdp_transpose.py

Created February 1, 2020 18:32

	# Set the Index on Country Name - previously was numeric.
	gdp_transpose = gdp.set_index('Country Name')
	# Transpose the index and columns
	gdp_transpose = gdp_transpose.transpose()
	# Reset the index, sets, 'index' as a column
	gdp_transpose = gdp_transpose.reset_index()
	# Rename the 'index' column to year
	gdp_transpose = gdp_transpose.rename(columns={'index':'Year'})
	# Set the DataFrame Index to newly named 'Year'
	gdp_transpose = gdp_transpose.set_index('Year')

rchardptrsn / gdp_percent_change.py

Created February 1, 2020 18:38

	gdp_pct_diff = gdp_transpose
	gdp_pct_diff = gdp_pct_diff.pct_change()
	gdp_pct_diff.head()

rchardptrsn / gdb_line_plot.py

Last active February 1, 2020 20:51

	import seaborn as sns
	import matplotlib.pyplot as plt
	from matplotlib.collections import LineCollection

	# Initialize figure and size of the plot
	plt.figure(figsize=(14,8))

	# Seaborn line plot
	ax = sns.lineplot(x='Year',y='Average GDP Pct Change',
	data=pct_change,

rchardptrsn / cosmos_client.py

Created February 9, 2020 19:27

	import pandas as pd
	import json
	import azure.cosmos.cosmos_client as cosmos_client
	import azure.cosmos.errors as errors
	import azure.cosmos.documents as documents
	import azure.cosmos.http_constants as http_constants

	print('Imported packages successfully.')

	# Initialize the Cosmos client

rchardptrsn / cosmos_create_db.py

Created February 9, 2020 19:37

	# Create a database
	# https://docs.microsoft.com/en-us/python/api/azure-cosmos/azure.cosmos.cosmos_client.cosmosclient?view=azure-python#createdatabase-database--options-none-

	database_name = 'HDIdatabase'
	try:
	database = client.CreateDatabase({'id': database_name})
	except errors.HTTPFailure:
	database = client.ReadDatabase("dbs/" + database_name)

rchardptrsn / cosmos_create_container.py

Last active February 9, 2020 20:22

	# Create a collection of items in a Container
	# Items are the individual rows/records of your dataset
	# https://docs.microsoft.com/en-us/python/api/azure-cosmos/azure.cosmos.cosmos_client.cosmosclient?view=azure-python#createcontainer-database-link--collection--options-none-

	database_link = 'dbs/' + 'HDIdatabase'
	container_definition = {'id': 'HDIcontainer',
	'partitionKey':
	{
	'paths': ['/country'],
	'kind': documents.PartitionKind.Hash

rchardptrsn / cosmos_download_data.py

Created February 9, 2020 20:14

	# Download and read csv file
	df = pd.read_csv('https://globaldatalab.org/assets/2019/09/SHDI%20Complete%203.0.csv',encoding='ISO-8859–1',dtype='str')
	# Reset index - creates a column called 'index'
	df = df.reset_index()
	# Rename that new column 'id'
	# Cosmos DB needs one column named 'id'.
	df = df.rename(columns={'index':'id'})
	# Convert the id column to a string - this is a document database.
	df['id'] = df['id'].astype(str)

Richard Peterson rchardptrsn