Last active
December 12, 2017 14:49
-
-
Save tomislacker/63f934723d8e2e360fcda3ab111b4dfd to your computer and use it in GitHub Desktop.
Top Cities (& metro areas) Alphabetically
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import io\n", | |
| "import pandas as pd\n", | |
| "import numpy as np\n", | |
| "try:\n", | |
| " from StringIO import StringIO\n", | |
| "except ImportError:\n", | |
| " from io import StringIO\n", | |
| "DATAFILE = '/home/btomasik/aff/PEP_2016_PEPANNRES_with_ann.csv'\n", | |
| "\n", | |
| "# Read in the data file\n", | |
| "with io.open(DATAFILE, mode='r', encoding='iso-8859-1') as infile:\n", | |
| " raw_data_csv = infile.read()\n", | |
| "\n", | |
| "# Create the data frame\n", | |
| "df = pd.read_csv(StringIO(raw_data_csv), skiprows=[1])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Save the first letter of each display label in lowercase\n", | |
| "df['letter'] = df['GEO.display-label'].str.slice(0, 1)\n", | |
| "df['letter'] = df['letter'].str.lower()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Largest Metro by 2010 Census" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>GEO.display-label</th>\n", | |
| " <th>rescen42010</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>49</th>\n", | |
| " <td>Atlanta-Sandy Springs-Roswell, GA Metro Area</td>\n", | |
| " <td>5286728</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>108</th>\n", | |
| " <td>Boston-Cambridge-Newton, MA-NH Metro Area</td>\n", | |
| " <td>4552402</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>167</th>\n", | |
| " <td>Chicago-Naperville-Elgin, IL-IN-WI Metro Area</td>\n", | |
| " <td>9461105</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>217</th>\n", | |
| " <td>Dallas-Fort Worth-Arlington, TX Metro Area</td>\n", | |
| " <td>6426214</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>271</th>\n", | |
| " <td>El Paso, TX Metro Area</td>\n", | |
| " <td>804123</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>320</th>\n", | |
| " <td>Fresno, CA Metro Area</td>\n", | |
| " <td>930450</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>342</th>\n", | |
| " <td>Grand Rapids-Wyoming, MI Metro Area</td>\n", | |
| " <td>988938</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>394</th>\n", | |
| " <td>Houston-The Woodlands-Sugar Land, TX Metro Area</td>\n", | |
| " <td>5920416</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>406</th>\n", | |
| " <td>Indianapolis-Carmel-Anderson, IN Metro Area</td>\n", | |
| " <td>1887877</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>417</th>\n", | |
| " <td>Jacksonville, FL Metro Area</td>\n", | |
| " <td>1345596</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>440</th>\n", | |
| " <td>Kansas City, MO-KS Metro Area</td>\n", | |
| " <td>2009342</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>506</th>\n", | |
| " <td>Los Angeles-Long Beach-Anaheim, CA Metro Area</td>\n", | |
| " <td>12828837</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>558</th>\n", | |
| " <td>Miami-Fort Lauderdale-West Palm Beach, FL Metr...</td>\n", | |
| " <td>5564635</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>616</th>\n", | |
| " <td>New York-Newark-Jersey City, NY-NJ-PA Metro Area</td>\n", | |
| " <td>19567410</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>642</th>\n", | |
| " <td>Orlando-Kissimmee-Sanford, FL Metro Area</td>\n", | |
| " <td>2134411</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>674</th>\n", | |
| " <td>Philadelphia-Camden-Wilmington, PA-NJ-DE-MD Me...</td>\n", | |
| " <td>5965343</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>710</th>\n", | |
| " <td>Quincy, IL-MO Micro Area</td>\n", | |
| " <td>77314</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>725</th>\n", | |
| " <td>Riverside-San Bernardino-Ontario, CA Metro Area</td>\n", | |
| " <td>4224851</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>765</th>\n", | |
| " <td>San Francisco-Oakland-Hayward, CA Metro Area</td>\n", | |
| " <td>4335391</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>846</th>\n", | |
| " <td>Tampa-St. Petersburg-Clearwater, FL Metro Area</td>\n", | |
| " <td>2783243</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>875</th>\n", | |
| " <td>Urban Honolulu, HI Metro Area</td>\n", | |
| " <td>953207</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>891</th>\n", | |
| " <td>Virginia Beach-Norfolk-Newport News, VA-NC Met...</td>\n", | |
| " <td>1676822</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>904</th>\n", | |
| " <td>Washington-Arlington-Alexandria, DC-VA-MD-WV M...</td>\n", | |
| " <td>5636232</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>940</th>\n", | |
| " <td>Youngstown-Warren-Boardman, OH-PA Metro Area</td>\n", | |
| " <td>565773</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>943</th>\n", | |
| " <td>Zanesville, OH Micro Area</td>\n", | |
| " <td>86074</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " GEO.display-label rescen42010\n", | |
| "49 Atlanta-Sandy Springs-Roswell, GA Metro Area 5286728\n", | |
| "108 Boston-Cambridge-Newton, MA-NH Metro Area 4552402\n", | |
| "167 Chicago-Naperville-Elgin, IL-IN-WI Metro Area 9461105\n", | |
| "217 Dallas-Fort Worth-Arlington, TX Metro Area 6426214\n", | |
| "271 El Paso, TX Metro Area 804123\n", | |
| "320 Fresno, CA Metro Area 930450\n", | |
| "342 Grand Rapids-Wyoming, MI Metro Area 988938\n", | |
| "394 Houston-The Woodlands-Sugar Land, TX Metro Area 5920416\n", | |
| "406 Indianapolis-Carmel-Anderson, IN Metro Area 1887877\n", | |
| "417 Jacksonville, FL Metro Area 1345596\n", | |
| "440 Kansas City, MO-KS Metro Area 2009342\n", | |
| "506 Los Angeles-Long Beach-Anaheim, CA Metro Area 12828837\n", | |
| "558 Miami-Fort Lauderdale-West Palm Beach, FL Metr... 5564635\n", | |
| "616 New York-Newark-Jersey City, NY-NJ-PA Metro Area 19567410\n", | |
| "642 Orlando-Kissimmee-Sanford, FL Metro Area 2134411\n", | |
| "674 Philadelphia-Camden-Wilmington, PA-NJ-DE-MD Me... 5965343\n", | |
| "710 Quincy, IL-MO Micro Area 77314\n", | |
| "725 Riverside-San Bernardino-Ontario, CA Metro Area 4224851\n", | |
| "765 San Francisco-Oakland-Hayward, CA Metro Area 4335391\n", | |
| "846 Tampa-St. Petersburg-Clearwater, FL Metro Area 2783243\n", | |
| "875 Urban Honolulu, HI Metro Area 953207\n", | |
| "891 Virginia Beach-Norfolk-Newport News, VA-NC Met... 1676822\n", | |
| "904 Washington-Arlington-Alexandria, DC-VA-MD-WV M... 5636232\n", | |
| "940 Youngstown-Warren-Boardman, OH-PA Metro Area 565773\n", | |
| "943 Zanesville, OH Micro Area 86074" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df['is_largest'] = df.groupby(['letter'])['rescen42010'].transform(max) == df['rescen42010']\n", | |
| "df[df['is_largest']][['GEO.display-label','rescen42010']]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Largest by 2016 Cenus Estimation" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>GEO.display-label</th>\n", | |
| " <th>respop72016</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>49</th>\n", | |
| " <td>Atlanta-Sandy Springs-Roswell, GA Metro Area</td>\n", | |
| " <td>5789700</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>108</th>\n", | |
| " <td>Boston-Cambridge-Newton, MA-NH Metro Area</td>\n", | |
| " <td>4794447</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>167</th>\n", | |
| " <td>Chicago-Naperville-Elgin, IL-IN-WI Metro Area</td>\n", | |
| " <td>9512999</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>217</th>\n", | |
| " <td>Dallas-Fort Worth-Arlington, TX Metro Area</td>\n", | |
| " <td>7233323</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>271</th>\n", | |
| " <td>El Paso, TX Metro Area</td>\n", | |
| " <td>841971</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>320</th>\n", | |
| " <td>Fresno, CA Metro Area</td>\n", | |
| " <td>979915</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>342</th>\n", | |
| " <td>Grand Rapids-Wyoming, MI Metro Area</td>\n", | |
| " <td>1047099</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>394</th>\n", | |
| " <td>Houston-The Woodlands-Sugar Land, TX Metro Area</td>\n", | |
| " <td>6772470</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>406</th>\n", | |
| " <td>Indianapolis-Carmel-Anderson, IN Metro Area</td>\n", | |
| " <td>2004230</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>417</th>\n", | |
| " <td>Jacksonville, FL Metro Area</td>\n", | |
| " <td>1478212</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>440</th>\n", | |
| " <td>Kansas City, MO-KS Metro Area</td>\n", | |
| " <td>2104509</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>506</th>\n", | |
| " <td>Los Angeles-Long Beach-Anaheim, CA Metro Area</td>\n", | |
| " <td>13310447</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>558</th>\n", | |
| " <td>Miami-Fort Lauderdale-West Palm Beach, FL Metr...</td>\n", | |
| " <td>6066387</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>616</th>\n", | |
| " <td>New York-Newark-Jersey City, NY-NJ-PA Metro Area</td>\n", | |
| " <td>20153634</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>642</th>\n", | |
| " <td>Orlando-Kissimmee-Sanford, FL Metro Area</td>\n", | |
| " <td>2441257</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>674</th>\n", | |
| " <td>Philadelphia-Camden-Wilmington, PA-NJ-DE-MD Me...</td>\n", | |
| " <td>6070500</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>710</th>\n", | |
| " <td>Quincy, IL-MO Micro Area</td>\n", | |
| " <td>76712</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>725</th>\n", | |
| " <td>Riverside-San Bernardino-Ontario, CA Metro Area</td>\n", | |
| " <td>4527837</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>765</th>\n", | |
| " <td>San Francisco-Oakland-Hayward, CA Metro Area</td>\n", | |
| " <td>4679166</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>846</th>\n", | |
| " <td>Tampa-St. Petersburg-Clearwater, FL Metro Area</td>\n", | |
| " <td>3032171</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>875</th>\n", | |
| " <td>Urban Honolulu, HI Metro Area</td>\n", | |
| " <td>992605</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>891</th>\n", | |
| " <td>Virginia Beach-Norfolk-Newport News, VA-NC Met...</td>\n", | |
| " <td>1726907</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>904</th>\n", | |
| " <td>Washington-Arlington-Alexandria, DC-VA-MD-WV M...</td>\n", | |
| " <td>6131977</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>940</th>\n", | |
| " <td>Youngstown-Warren-Boardman, OH-PA Metro Area</td>\n", | |
| " <td>544746</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>943</th>\n", | |
| " <td>Zanesville, OH Micro Area</td>\n", | |
| " <td>86068</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " GEO.display-label respop72016\n", | |
| "49 Atlanta-Sandy Springs-Roswell, GA Metro Area 5789700\n", | |
| "108 Boston-Cambridge-Newton, MA-NH Metro Area 4794447\n", | |
| "167 Chicago-Naperville-Elgin, IL-IN-WI Metro Area 9512999\n", | |
| "217 Dallas-Fort Worth-Arlington, TX Metro Area 7233323\n", | |
| "271 El Paso, TX Metro Area 841971\n", | |
| "320 Fresno, CA Metro Area 979915\n", | |
| "342 Grand Rapids-Wyoming, MI Metro Area 1047099\n", | |
| "394 Houston-The Woodlands-Sugar Land, TX Metro Area 6772470\n", | |
| "406 Indianapolis-Carmel-Anderson, IN Metro Area 2004230\n", | |
| "417 Jacksonville, FL Metro Area 1478212\n", | |
| "440 Kansas City, MO-KS Metro Area 2104509\n", | |
| "506 Los Angeles-Long Beach-Anaheim, CA Metro Area 13310447\n", | |
| "558 Miami-Fort Lauderdale-West Palm Beach, FL Metr... 6066387\n", | |
| "616 New York-Newark-Jersey City, NY-NJ-PA Metro Area 20153634\n", | |
| "642 Orlando-Kissimmee-Sanford, FL Metro Area 2441257\n", | |
| "674 Philadelphia-Camden-Wilmington, PA-NJ-DE-MD Me... 6070500\n", | |
| "710 Quincy, IL-MO Micro Area 76712\n", | |
| "725 Riverside-San Bernardino-Ontario, CA Metro Area 4527837\n", | |
| "765 San Francisco-Oakland-Hayward, CA Metro Area 4679166\n", | |
| "846 Tampa-St. Petersburg-Clearwater, FL Metro Area 3032171\n", | |
| "875 Urban Honolulu, HI Metro Area 992605\n", | |
| "891 Virginia Beach-Norfolk-Newport News, VA-NC Met... 1726907\n", | |
| "904 Washington-Arlington-Alexandria, DC-VA-MD-WV M... 6131977\n", | |
| "940 Youngstown-Warren-Boardman, OH-PA Metro Area 544746\n", | |
| "943 Zanesville, OH Micro Area 86068" | |
| ] | |
| }, | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df['is_largest'] = df.groupby(['letter'])['respop72016'].transform(max) == df['respop72016']\n", | |
| "df[df['is_largest']][['GEO.display-label','respop72016']]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Caveats\n", | |
| "There is no metro for the letter `x`..." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "DATAFILE = '/home/btomasik/PEP_2016_PEPANNRSIP.US12A/PEP_2016_PEPANNRSIP.US12A_with_ann.csv'\n", | |
| "\n", | |
| "# Read in the data file\n", | |
| "with io.open(DATAFILE, mode='r', encoding='iso-8859-1') as infile:\n", | |
| " raw_data_csv = infile.read()\n", | |
| "\n", | |
| "# Create the data frame\n", | |
| "df = pd.read_csv(StringIO(raw_data_csv), skiprows=[1])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>GEO.id</th>\n", | |
| " <th>GEO.id2</th>\n", | |
| " <th>GEO.display-label</th>\n", | |
| " <th>GC_RANK.target-geo-id</th>\n", | |
| " <th>GC_RANK.target-geo-id2</th>\n", | |
| " <th>GC_RANK.rank-label</th>\n", | |
| " <th>GC_RANK.display-label</th>\n", | |
| " <th>GC_RANK.display-label.1</th>\n", | |
| " <th>rescensus42010</th>\n", | |
| " <th>resbase42010</th>\n", | |
| " <th>respop72010</th>\n", | |
| " <th>respop72011</th>\n", | |
| " <th>respop72012</th>\n", | |
| " <th>respop72013</th>\n", | |
| " <th>respop72014</th>\n", | |
| " <th>respop72015</th>\n", | |
| " <th>respop72016</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>0100000US</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>United States</td>\n", | |
| " <td>1620000US3651000</td>\n", | |
| " <td>3651000</td>\n", | |
| " <td>1</td>\n", | |
| " <td>United States - New York city, New York</td>\n", | |
| " <td>New York city, New York</td>\n", | |
| " <td>8175133</td>\n", | |
| " <td>8174962</td>\n", | |
| " <td>8192026</td>\n", | |
| " <td>8284098</td>\n", | |
| " <td>8361179</td>\n", | |
| " <td>8422460</td>\n", | |
| " <td>8471990</td>\n", | |
| " <td>8516502</td>\n", | |
| " <td>8537673</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>0100000US</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>United States</td>\n", | |
| " <td>1620000US0644000</td>\n", | |
| " <td>644000</td>\n", | |
| " <td>2</td>\n", | |
| " <td>United States - Los Angeles city, California</td>\n", | |
| " <td>Los Angeles city, California</td>\n", | |
| " <td>3792621</td>\n", | |
| " <td>3792584</td>\n", | |
| " <td>3796292</td>\n", | |
| " <td>3825393</td>\n", | |
| " <td>3858137</td>\n", | |
| " <td>3890436</td>\n", | |
| " <td>3920173</td>\n", | |
| " <td>3949149</td>\n", | |
| " <td>3976322</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>0100000US</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>United States</td>\n", | |
| " <td>1620000US1714000</td>\n", | |
| " <td>1714000</td>\n", | |
| " <td>3</td>\n", | |
| " <td>United States - Chicago city, Illinois</td>\n", | |
| " <td>Chicago city, Illinois</td>\n", | |
| " <td>2695598</td>\n", | |
| " <td>2695620</td>\n", | |
| " <td>2697736</td>\n", | |
| " <td>2705404</td>\n", | |
| " <td>2714120</td>\n", | |
| " <td>2718887</td>\n", | |
| " <td>2718530</td>\n", | |
| " <td>2713596</td>\n", | |
| " <td>2704958</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>0100000US</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>United States</td>\n", | |
| " <td>1620000US4835000</td>\n", | |
| " <td>4835000</td>\n", | |
| " <td>4</td>\n", | |
| " <td>United States - Houston city, Texas</td>\n", | |
| " <td>Houston city, Texas</td>\n", | |
| " <td>2099451</td>\n", | |
| " <td>2100277</td>\n", | |
| " <td>2105625</td>\n", | |
| " <td>2132157</td>\n", | |
| " <td>2166458</td>\n", | |
| " <td>2204406</td>\n", | |
| " <td>2243999</td>\n", | |
| " <td>2284816</td>\n", | |
| " <td>2303482</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>0100000US</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>United States</td>\n", | |
| " <td>1620000US0455000</td>\n", | |
| " <td>455000</td>\n", | |
| " <td>5</td>\n", | |
| " <td>United States - Phoenix city, Arizona</td>\n", | |
| " <td>Phoenix city, Arizona</td>\n", | |
| " <td>1445632</td>\n", | |
| " <td>1447624</td>\n", | |
| " <td>1450629</td>\n", | |
| " <td>1469353</td>\n", | |
| " <td>1499007</td>\n", | |
| " <td>1525562</td>\n", | |
| " <td>1554179</td>\n", | |
| " <td>1582904</td>\n", | |
| " <td>1615017</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " GEO.id GEO.id2 GEO.display-label GC_RANK.target-geo-id \\\n", | |
| "0 0100000US NaN United States 1620000US3651000 \n", | |
| "1 0100000US NaN United States 1620000US0644000 \n", | |
| "2 0100000US NaN United States 1620000US1714000 \n", | |
| "3 0100000US NaN United States 1620000US4835000 \n", | |
| "4 0100000US NaN United States 1620000US0455000 \n", | |
| "\n", | |
| " GC_RANK.target-geo-id2 GC_RANK.rank-label \\\n", | |
| "0 3651000 1 \n", | |
| "1 644000 2 \n", | |
| "2 1714000 3 \n", | |
| "3 4835000 4 \n", | |
| "4 455000 5 \n", | |
| "\n", | |
| " GC_RANK.display-label GC_RANK.display-label.1 \\\n", | |
| "0 United States - New York city, New York New York city, New York \n", | |
| "1 United States - Los Angeles city, California Los Angeles city, California \n", | |
| "2 United States - Chicago city, Illinois Chicago city, Illinois \n", | |
| "3 United States - Houston city, Texas Houston city, Texas \n", | |
| "4 United States - Phoenix city, Arizona Phoenix city, Arizona \n", | |
| "\n", | |
| " rescensus42010 resbase42010 respop72010 respop72011 respop72012 \\\n", | |
| "0 8175133 8174962 8192026 8284098 8361179 \n", | |
| "1 3792621 3792584 3796292 3825393 3858137 \n", | |
| "2 2695598 2695620 2697736 2705404 2714120 \n", | |
| "3 2099451 2100277 2105625 2132157 2166458 \n", | |
| "4 1445632 1447624 1450629 1469353 1499007 \n", | |
| "\n", | |
| " respop72013 respop72014 respop72015 respop72016 \n", | |
| "0 8422460 8471990 8516502 8537673 \n", | |
| "1 3890436 3920173 3949149 3976322 \n", | |
| "2 2718887 2718530 2713596 2704958 \n", | |
| "3 2204406 2243999 2284816 2303482 \n", | |
| "4 1525562 1554179 1582904 1615017 " | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Save the first letter of each display label in lowercase\n", | |
| "df['letter'] = df['GC_RANK.display-label.1'].str.slice(0, 1)\n", | |
| "df['letter'] = df['letter'].str.lower()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Largest City by 2010 Census" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>GC_RANK.display-label.1</th>\n", | |
| " <th>rescensus42010</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>310</th>\n", | |
| " <td>Albany city, New York</td>\n", | |
| " <td>97856</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>277</th>\n", | |
| " <td>Broken Arrow city, Oklahoma</td>\n", | |
| " <td>98850</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>312</th>\n", | |
| " <td>Compton city, California</td>\n", | |
| " <td>96455</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>295</th>\n", | |
| " <td>Davenport city, Iowa</td>\n", | |
| " <td>99685</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>292</th>\n", | |
| " <td>El Cajon city, California</td>\n", | |
| " <td>99478</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>315</th>\n", | |
| " <td>Federal Way city, Washington</td>\n", | |
| " <td>89306</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>290</th>\n", | |
| " <td>Greeley city, Colorado</td>\n", | |
| " <td>92889</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>284</th>\n", | |
| " <td>Hillsboro city, Oregon</td>\n", | |
| " <td>91611</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>14</th>\n", | |
| " <td>Indianapolis city (balance), Indiana</td>\n", | |
| " <td>820445</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td>Jacksonville city, Florida</td>\n", | |
| " <td>821784</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>304</th>\n", | |
| " <td>Kenosha city, Wisconsin</td>\n", | |
| " <td>99218</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>298</th>\n", | |
| " <td>Las Cruces city, New Mexico</td>\n", | |
| " <td>97618</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>316</th>\n", | |
| " <td>Mission Viejo city, California</td>\n", | |
| " <td>93305</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>271</th>\n", | |
| " <td>North Charleston city, South Carolina</td>\n", | |
| " <td>97471</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>233</th>\n", | |
| " <td>Odessa city, Texas</td>\n", | |
| " <td>99940</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>270</th>\n", | |
| " <td>Pompano Beach city, Florida</td>\n", | |
| " <td>99845</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>337</th>\n", | |
| " <td>Quincy city, Massachusetts</td>\n", | |
| " <td>92271</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>226</th>\n", | |
| " <td>Round Rock city, Texas</td>\n", | |
| " <td>99887</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>281</th>\n", | |
| " <td>Santa Maria city, California</td>\n", | |
| " <td>99553</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>286</th>\n", | |
| " <td>Tyler city, Texas</td>\n", | |
| " <td>96900</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>447</th>\n", | |
| " <td>Upland city, California</td>\n", | |
| " <td>73732</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>300</th>\n", | |
| " <td>Vista city, California</td>\n", | |
| " <td>93834</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>274</th>\n", | |
| " <td>West Palm Beach city, Florida</td>\n", | |
| " <td>99919</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>329</th>\n", | |
| " <td>Yuma city, Arizona</td>\n", | |
| " <td>93064</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " GC_RANK.display-label.1 rescensus42010\n", | |
| "310 Albany city, New York 97856\n", | |
| "277 Broken Arrow city, Oklahoma 98850\n", | |
| "312 Compton city, California 96455\n", | |
| "295 Davenport city, Iowa 99685\n", | |
| "292 El Cajon city, California 99478\n", | |
| "315 Federal Way city, Washington 89306\n", | |
| "290 Greeley city, Colorado 92889\n", | |
| "284 Hillsboro city, Oregon 91611\n", | |
| "14 Indianapolis city (balance), Indiana 820445\n", | |
| "11 Jacksonville city, Florida 821784\n", | |
| "304 Kenosha city, Wisconsin 99218\n", | |
| "298 Las Cruces city, New Mexico 97618\n", | |
| "316 Mission Viejo city, California 93305\n", | |
| "271 North Charleston city, South Carolina 97471\n", | |
| "233 Odessa city, Texas 99940\n", | |
| "270 Pompano Beach city, Florida 99845\n", | |
| "337 Quincy city, Massachusetts 92271\n", | |
| "226 Round Rock city, Texas 99887\n", | |
| "281 Santa Maria city, California 99553\n", | |
| "286 Tyler city, Texas 96900\n", | |
| "447 Upland city, California 73732\n", | |
| "300 Vista city, California 93834\n", | |
| "274 West Palm Beach city, Florida 99919\n", | |
| "329 Yuma city, Arizona 93064" | |
| ] | |
| }, | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df['is_largest'] = df.groupby(['letter'])['rescensus42010'].transform(max) == df['rescensus42010']\n", | |
| "df[df['is_largest']][['GC_RANK.display-label.1','rescensus42010']].sort_values('GC_RANK.display-label.1')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Largest City by 2016 Census Estimation" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>GC_RANK.display-label.1</th>\n", | |
| " <th>respop72016</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td>Austin city, Texas</td>\n", | |
| " <td>947890</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>21</th>\n", | |
| " <td>Boston city, Massachusetts</td>\n", | |
| " <td>673184</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Chicago city, Illinois</td>\n", | |
| " <td>2704958</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>Dallas city, Texas</td>\n", | |
| " <td>1317929</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>19</th>\n", | |
| " <td>El Paso city, Texas</td>\n", | |
| " <td>683080</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>15</th>\n", | |
| " <td>Fort Worth city, Texas</td>\n", | |
| " <td>854113</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>67</th>\n", | |
| " <td>Greensboro city, North Carolina</td>\n", | |
| " <td>287027</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>Houston city, Texas</td>\n", | |
| " <td>2303482</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>14</th>\n", | |
| " <td>Indianapolis city (balance), Indiana</td>\n", | |
| " <td>855164</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td>Jacksonville city, Florida</td>\n", | |
| " <td>880619</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>36</th>\n", | |
| " <td>Kansas City city, Missouri</td>\n", | |
| " <td>481420</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>Los Angeles city, California</td>\n", | |
| " <td>3976322</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>24</th>\n", | |
| " <td>Memphis city, Tennessee</td>\n", | |
| " <td>652717</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>New York city, New York</td>\n", | |
| " <td>8537673</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>26</th>\n", | |
| " <td>Oklahoma City city, Oklahoma</td>\n", | |
| " <td>638367</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>Phoenix city, Arizona</td>\n", | |
| " <td>1615017</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>337</th>\n", | |
| " <td>Quincy city, Massachusetts</td>\n", | |
| " <td>93688</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>40</th>\n", | |
| " <td>Raleigh city, North Carolina</td>\n", | |
| " <td>458880</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>San Antonio city, Texas</td>\n", | |
| " <td>1492510</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>32</th>\n", | |
| " <td>Tucson city, Arizona</td>\n", | |
| " <td>530706</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>54</th>\n", | |
| " <td>Urban Honolulu CDP, Hawaii</td>\n", | |
| " <td>351792</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>42</th>\n", | |
| " <td>Virginia Beach city, Virginia</td>\n", | |
| " <td>452602</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>20</th>\n", | |
| " <td>Washington city, District of Columbia</td>\n", | |
| " <td>681170</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>112</th>\n", | |
| " <td>Yonkers city, New York</td>\n", | |
| " <td>200807</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " GC_RANK.display-label.1 respop72016\n", | |
| "10 Austin city, Texas 947890\n", | |
| "21 Boston city, Massachusetts 673184\n", | |
| "2 Chicago city, Illinois 2704958\n", | |
| "8 Dallas city, Texas 1317929\n", | |
| "19 El Paso city, Texas 683080\n", | |
| "15 Fort Worth city, Texas 854113\n", | |
| "67 Greensboro city, North Carolina 287027\n", | |
| "3 Houston city, Texas 2303482\n", | |
| "14 Indianapolis city (balance), Indiana 855164\n", | |
| "11 Jacksonville city, Florida 880619\n", | |
| "36 Kansas City city, Missouri 481420\n", | |
| "1 Los Angeles city, California 3976322\n", | |
| "24 Memphis city, Tennessee 652717\n", | |
| "0 New York city, New York 8537673\n", | |
| "26 Oklahoma City city, Oklahoma 638367\n", | |
| "4 Phoenix city, Arizona 1615017\n", | |
| "337 Quincy city, Massachusetts 93688\n", | |
| "40 Raleigh city, North Carolina 458880\n", | |
| "6 San Antonio city, Texas 1492510\n", | |
| "32 Tucson city, Arizona 530706\n", | |
| "54 Urban Honolulu CDP, Hawaii 351792\n", | |
| "42 Virginia Beach city, Virginia 452602\n", | |
| "20 Washington city, District of Columbia 681170\n", | |
| "112 Yonkers city, New York 200807" | |
| ] | |
| }, | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df['is_largest'] = df.groupby(['letter'])['respop72016'].transform(max) == df['respop72016']\n", | |
| "df[df['is_largest']][['GC_RANK.display-label.1','respop72016']].sort_values('GC_RANK.display-label.1')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Caveats\n", | |
| "There is no city for the letters `x`, `z`..." | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.6.2" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment