Skip to content

Instantly share code, notes, and snippets.

@knaaptime
Created December 11, 2019 19:55
Show Gist options
  • Save knaaptime/6cd4004aeedb5779958a138c7cad216a to your computer and use it in GitHub Desktop.
Save knaaptime/6cd4004aeedb5779958a138c7cad216a to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from geosnap import Community"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import geopandas as gpd"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"shp = gpd.read_file(\"/Users/knaaptime/Downloads/10180.shp\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"d = Community.from_geodataframes([shp])"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"d.gdf['year'] = 2005"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"test = Community.from_lodes(msa_fips='10180')"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>geoid</th>\n",
" <th>year</th>\n",
" <th>CFA01</th>\n",
" <th>CFA02</th>\n",
" <th>CFA03</th>\n",
" <th>CFA04</th>\n",
" <th>CFA05</th>\n",
" <th>CFS01</th>\n",
" <th>CFS02</th>\n",
" <th>CFS03</th>\n",
" <th>...</th>\n",
" <th>naics_55</th>\n",
" <th>naics_56</th>\n",
" <th>naics_61</th>\n",
" <th>naics_62</th>\n",
" <th>naics_71</th>\n",
" <th>naics_72</th>\n",
" <th>naics_81</th>\n",
" <th>naics_90</th>\n",
" <th>population</th>\n",
" <th>total_employees</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>480590302003239</td>\n",
" <td>2015</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>8</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>480590301012043</td>\n",
" <td>2015</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>53</td>\n",
" <td>3</td>\n",
" <td>57</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>480590302002344</td>\n",
" <td>2015</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>51</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>480590302004082</td>\n",
" <td>2015</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>9</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>480590302001064</td>\n",
" <td>2015</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>21</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 57 columns</p>\n",
"</div>"
],
"text/plain": [
" geoid year CFA01 CFA02 CFA03 CFA04 CFA05 CFS01 CFS02 \\\n",
"0 480590302003239 2015 0 0 0 0 0 0 0 \n",
"1 480590301012043 2015 0 0 0 0 0 0 0 \n",
"2 480590302002344 2015 0 0 0 0 0 0 0 \n",
"3 480590302004082 2015 0 0 0 0 0 0 0 \n",
"4 480590302001064 2015 0 0 0 0 0 0 0 \n",
"\n",
" CFS03 ... naics_55 naics_56 naics_61 naics_62 naics_71 naics_72 \\\n",
"0 0 ... 0 0 0 0 0 0 \n",
"1 0 ... 0 0 0 0 0 0 \n",
"2 0 ... 0 0 0 0 0 0 \n",
"3 0 ... 0 0 0 0 0 0 \n",
"4 0 ... 0 0 0 0 0 0 \n",
"\n",
" naics_81 naics_90 population total_employees \n",
"0 3 0 8 3 \n",
"1 2 53 3 57 \n",
"2 0 0 51 2 \n",
"3 0 0 9 11 \n",
"4 0 0 21 1 \n",
"\n",
"[5 rows x 57 columns]"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test.gdf.head()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"cols = d.gdf.columns[d.gdf.columns.str.startswith('naics')]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['naics_11', 'naics_21', 'naics_22', 'naics_23', 'naics_42', 'naics_51',\n",
" 'naics_52', 'naics_53', 'naics_54', 'naics_55', 'naics_56', 'naics_61',\n",
" 'naics_62', 'naics_71', 'naics_72', 'naics_81', 'naics_90'],\n",
" dtype='object')"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cols"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>geoid</th>\n",
" <th>geometry</th>\n",
" <th>naics_11</th>\n",
" <th>naics_21</th>\n",
" <th>naics_22</th>\n",
" <th>naics_23</th>\n",
" <th>naics_42</th>\n",
" <th>naics_51</th>\n",
" <th>naics_52</th>\n",
" <th>naics_53</th>\n",
" <th>naics_54</th>\n",
" <th>naics_55</th>\n",
" <th>naics_56</th>\n",
" <th>naics_61</th>\n",
" <th>naics_62</th>\n",
" <th>naics_71</th>\n",
" <th>naics_72</th>\n",
" <th>naics_81</th>\n",
" <th>naics_90</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>480590302004004</td>\n",
" <td>POLYGON ((-99.155506 32.128224, -99.1568669999...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>480590302004017</td>\n",
" <td>POLYGON ((-99.162854 32.132892, -99.163344 32....</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>480590302003050</td>\n",
" <td>POLYGON ((-99.167773 32.131983, -99.16754 32.1...</td>\n",
" <td>0.0</td>\n",
" <td>2.985586</td>\n",
" <td>0.0</td>\n",
" <td>1.079303</td>\n",
" <td>1.452553</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>8.497436</td>\n",
" <td>2.414572</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.981199</td>\n",
" <td>0.775023</td>\n",
" <td>0.0</td>\n",
" <td>0.600689</td>\n",
" <td>1.470928</td>\n",
" <td>1.034817</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>480590302003245</td>\n",
" <td>POLYGON ((-99.413296 32.102523, -99.4118299999...</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>8.094773</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>18.109290</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>480590302003179</td>\n",
" <td>POLYGON ((-99.352863 32.248778, -99.3531709999...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" geoid geometry \\\n",
"0 480590302004004 POLYGON ((-99.155506 32.128224, -99.1568669999... \n",
"1 480590302004017 POLYGON ((-99.162854 32.132892, -99.163344 32.... \n",
"2 480590302003050 POLYGON ((-99.167773 32.131983, -99.16754 32.1... \n",
"3 480590302003245 POLYGON ((-99.413296 32.102523, -99.4118299999... \n",
"4 480590302003179 POLYGON ((-99.352863 32.248778, -99.3531709999... \n",
"\n",
" naics_11 naics_21 naics_22 naics_23 naics_42 naics_51 naics_52 \\\n",
"0 NaN NaN NaN NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN NaN NaN NaN \n",
"2 0.0 2.985586 0.0 1.079303 1.452553 0.0 0.0 \n",
"3 0.0 0.000000 0.0 8.094773 0.000000 0.0 0.0 \n",
"4 NaN NaN NaN NaN NaN NaN NaN \n",
"\n",
" naics_53 naics_54 naics_55 naics_56 naics_61 naics_62 naics_71 \\\n",
"0 NaN NaN NaN NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN NaN NaN NaN \n",
"2 8.497436 2.414572 0.0 0.0 0.981199 0.775023 0.0 \n",
"3 0.000000 18.109290 0.0 0.0 0.000000 0.000000 0.0 \n",
"4 NaN NaN NaN NaN NaN NaN NaN \n",
"\n",
" naics_72 naics_81 naics_90 \n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 0.600689 1.470928 1.034817 \n",
"3 0.000000 0.000000 0.000000 \n",
"4 NaN NaN NaN "
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d.gdf.head()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/knaaptime/projects/geosnap/geosnap/analyze/cluster.py:264: UserWarning: Note: Gaussian Mixture Clustering is probabilistic--cluster labels may be different for different runs. If you need consistency, you should set the `random_state` parameter\n",
" \"Note: Gaussian Mixture Clustering is probabilistic--\"\n"
]
}
],
"source": [
"d = d.cluster(columns=cols, method='gaussian_mixture', best_model=True)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"d.gdf=d.gdf.to_crs(epsg=3857)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig, ax = plt.subplots()\n",
"d.gdf.dropna(subset=['gaussian_mixture']).plot(column='gaussian_mixture', categorical=True, ax=ax)\n",
"contextily.add_basemap(ax=ax)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"import contextily"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:geosnap]",
"language": "python",
"name": "conda-env-geosnap-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment