Created
September 20, 2024 19:08
-
-
Save devxpy/3c108eb2b199cdbcad80746192c80aa7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from babel.numbers import format_currency | |
# from gooeysite import wsgi | |
# | |
# assert wsgi | |
# import streamlit as gui | |
import plotly.graph_objects as go | |
import pandas as pd | |
gui.set_page_config(layout="wide") | |
df = pd.read_csv("/Users/Dev/Downloads/wid_all_data/WID_data_US.csv", sep=";") | |
meta = pd.read_csv("/Users/Dev/Downloads/wid_all_data/WID_metadata_US.csv", sep=";") | |
# gui.write(df) | |
# gui.write(meta) | |
parse_percentile = lambda x: tuple(map(float, x.strip("p").split("p"))) | |
groups = dict( | |
key_groups=[ | |
"p0p100", "p0p50", "p50p90", "p90p99", "p99p100", "p99.99p100", | |
# All deciles (p0p10, p10p20, p20p30, …, p90p100) | |
*["p{}p{}".format(i * 10, (i + 1) * 10) for i in range(10)], | |
], | |
detailed_p_groups=[ | |
# All percentiles (p0p1, p1p2, …, p98p99, p99p100) | |
*["p{}p{}".format(i, i + 1) for i in range(99)], | |
# Tenths of a percentile in the top 1% | |
"p99p99.1", "p99.1p99.2", "p99.2p99.3", "p99.3p99.4", "p99.4p99.5", "p99.5p99.6", "p99.6p99.7", "p99.7p99.8", "p99.8p99.9", | |
#"p99.9p100", | |
# Hundreds of a percentile in the top 0.1% | |
"p99.9p99.91", "p99.91p99.92", "p99.92p99.93", "p99.93p99.94", "p99.94p99.95", "p99.95p99.96", "p99.96p99.97", "p99.97p99.98", "p99.98p99.99", | |
#"p99.99p100", | |
# Thousands of a percentile in the top 0.01% | |
"p99.99p99.991", "p99.991p99.992", "p99.992p99.993", "p99.993p99.994", "p99.994p99.995", "p99.995p99.996", "p99.996p99.997", "p99.997p99.998", "p99.998p99.999", | |
# 10 thousands of a percentile in the top 0.001% | |
"p99.999p100", | |
], | |
# (p0p100, p1p100, p2p100, …) | |
detailed_top_groups=["p{}p100".format(i) for i in range(100)], | |
) # fmt:skip | |
col1, col2, col3 = gui.columns([3, 1, 1]) | |
with col1: | |
desc_cols = ["shortname", "shorttype", "shortpop", "shortage", "unit"] | |
format_variable = lambda v: ( | |
" | ".join(map(str, meta[meta["variable"] == v][desc_cols].values[0])) | |
) | |
variable = gui.selectbox( | |
"Select a variable", | |
meta["variable"], | |
format_func=format_variable, | |
) | |
with col2: | |
year = gui.selectbox("Select a year", sorted(df["year"].unique(), reverse=True)) | |
with col3: | |
group_name = gui.selectbox("Select a group", list(groups.keys())) | |
# filter data | |
df = df[df["variable"] == variable] | |
df = df[df["year"] == year] | |
df = df[df["percentile"].apply(lambda x: x in groups[group_name])] | |
if not len(df): | |
gui.error("No data available") | |
gui.stop() | |
# drop unused columns | |
df = df[["percentile", "value"]] | |
# convert percentile to ranges | |
df["percentile"] = df["percentile"].apply(parse_percentile, convert_dtype=True) | |
gui.write(df.sort_values(by="value"), hide_index=True) | |
df["percentile"] = df["percentile"].apply(lambda x: x[0]) | |
df = df.sort_values(by="percentile") | |
my_records = [] | |
for cutoff in [10, 50, 90, 95, 99, 99.9, 99.99, 99.999]: | |
value = int(df[df["percentile"] == cutoff]["value"].values[0]) | |
inr = format_currency(value, "INR", locale="en_IN") | |
my_records.append({"percentile": cutoff, "value": inr}) | |
gui.dataframe(pd.DataFrame.from_records(my_records), hide_index=True) | |
col1, col2 = gui.columns(2) | |
with col1: | |
start = gui.number_input( | |
"Start", min_value=0.0, max_value=100.0, step=0.001, value=0.0, format="%.3f" | |
) | |
with col2: | |
end = gui.number_input( | |
"End", min_value=0.0, max_value=100.0, step=0.001, value=100.0, format="%.3f" | |
) | |
df = df[df["percentile"].between(start, end)] | |
YAXIS_TYPE = gui.selectbox("Select a y-axis type", ["linear", "log"]) | |
fig = go.Figure( | |
data=[ | |
go.Scatter( | |
x=df["percentile"], | |
y=df["value"], | |
), | |
go.Bar( | |
x=df["percentile"], | |
y=df["value"], | |
), | |
] | |
) | |
fig.update_layout( | |
title_text="{} in {}".format(format_variable(variable), year), | |
xaxis_title_text="Percentile", | |
yaxis_title_text="Value", | |
# log scale for y axis | |
yaxis_type=YAXIS_TYPE, | |
# show y axis as currency | |
yaxis_tickformat=",.0f", | |
# add a range slider and autscale the y-axis | |
xaxis=dict(rangeslider=dict(visible=True)), | |
yaxis=dict(autorange=True, fixedrange=False), | |
) | |
gui.plotly_chart(fig, use_container_width=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment