Skip to content

Instantly share code, notes, and snippets.

# Map showing counties with typical income < 40k
tmp_df = data_df[data_df["Median_Household_Income_2018"] < 40000]
fig = px.choropleth_mapbox(tmp_df, locations="fips", color="Median_Household_Income_2018",
range_color=[0, 100000],
geojson=counties, color_continuous_scale=px.colors.diverging.RdYlBu, labels=labels)
fig.update_layout(coloraxis_colorbar=dict(
tickvals=[0, 20000, 40000, 60000, 80000, 100000],
ticktext=["0", "20k", "40k", "60k", "80k", "100k+"]
))
fig.update_layout(mapbox_style="carto-positron",
fig = px.choropleth_mapbox(data_df, locations="fips", color="Median_Household_Income_2018",
range_color=[0, 100000],
geojson=counties, color_continuous_scale=px.colors.diverging.RdYlBu, labels=labels)
fig.update_layout(coloraxis_colorbar=dict(
tickvals=[0, 20000, 40000, 60000, 80000, 100000],
ticktext=["0", "20k", "40k", "60k", "80k", "100k+"]
))
fig.update_layout(mapbox_style="carto-positron",
mapbox_zoom=3.9, mapbox_center={"lat": 37.0902, "lon": -95.7129},
margin={"r": 0, "t": 0, "l": 0, "b": 0})
import pandas as pd
import numpy as np
import plotly.express as px
import streamlit as st
from sklearn import model_selection
from sklearn import preprocessing
from sklearn import linear_model
from sklearn import svm
from sklearn import metrics
import pandas as pd
import numpy as np
import plotly.express as px
import streamlit as st
from sklearn import model_selection
from sklearn import preprocessing
from sklearn import linear_model
from sklearn import svm
from sklearn import metrics
import pandas as pd
# ===== START SCRAPING =====
import requests
from bs4 import BeautifulSoup
import re
def scrape_this(uri="/pages/forms/"):
data_rows = table.find_all("tr", attrs={"class": "team"}) # Includes the header row!
parsed_data = list()
stat_keys = [col.attrs["class"][0] for col in data_rows[0].find_all("td")]
for row in data_rows:
tmp_data = dict()
for attr in stat_keys:
attr_val = row.find(attrs={"class": attr}).text
tmp_data[attr] = re.sub(r"^\s+|\s+$", "", attr_val)
parsed_data.append(tmp_data)
>>> team_elms
<tr class="team">
<td class="name">
Boston Bruins
</td>
<td class="year">
1990
</td>
<td class="wins">
44
>>> per_poss_df
name g mp ... pts link season
0 Sacramento Kings 82 20080 ... 105.6 /teams/SAC/2001.html 2001
1 Milwaukee Bucks 82 19780 ... 108.8 /teams/MIL/2001.html 2001
2 Los Angeles Lakers 82 19905 ... 108.4 /teams/LAL/2001.html 2001
3 Dallas Mavericks 82 19805 ... 107.1 /teams/DAL/2001.html 2001
4 Toronto Raptors 82 19955 ... 105.9 /teams/TOR/2001.html 2001
.. ... .. ... ... ... ... ...
25 New York Knicks 66 15965 ... 106.5 /teams/NYK/2020.html 2020
26 Cleveland Cavaliers 65 15725 ... 107.5 /teams/CLE/2020.html 2020
# ========== (c) JP Hwang 22/8/20 ==========
import logging
# ===== START LOGGER =====
logger = logging.getLogger(__name__)
root_logger = logging.getLogger()
root_logger.setLevel(logging.INFO)
sh = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
>>> per_gm_df
name g mp ... pf pts link
0 Dallas Mavericks 75 242.3 ... 19.5 117.0 /teams/DAL/2020.html
1 Milwaukee Bucks 73 241.0 ... 19.6 118.7 /teams/MIL/2020.html
2 Portland Trail Blazers 74 241.0 ... 21.7 115.0 /teams/POR/2020.html
3 Houston Rockets 72 241.4 ... 21.8 117.8 /teams/HOU/2020.html
4 Los Angeles Clippers 72 241.4 ... 22.1 116.3 /teams/LAC/2020.html
5 New Orleans Pelicans 72 242.1 ... 21.2 115.8 /teams/NOP/2020.html
6 Phoenix Suns 73 241.0 ... 22.0 113.6 /teams/PHO/2020.html
7 Washington Wizards 72 241.0 ... 22.7 114.4 /teams/WAS/2020.html