Created
April 16, 2020 03:53
-
-
Save panicpotatoe/b323e42d390ff92090df3a5759730c75 to your computer and use it in GitHub Desktop.
Created on Skills Network Labs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Peer-Graded Assignment : Segmenting and Clustering Neighborhoods in Toronto (Part 1 & 2)\n", | |
| "- Build a dataframe of the postal code of each neighborhood along with the borough name and neighborhood name in Toronto.\n", | |
| "- Get the geographical coordinates of the neighborhoods in Toronto.\n", | |
| "***\n", | |
| "### 1. Import libraries" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Libraries imported.\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "import numpy as np # library to handle data in a vectorized manner\n", | |
| "\n", | |
| "import pandas as pd # library for data analsysis\n", | |
| "pd.set_option(\"display.max_columns\", None)\n", | |
| "pd.set_option(\"display.max_rows\", None)\n", | |
| "\n", | |
| "import json # library to handle JSON files\n", | |
| "\n", | |
| "from geopy.geocoders import Nominatim # convert an address into latitude and longitude values\n", | |
| "\n", | |
| "import requests # library to handle requests\n", | |
| "from bs4 import BeautifulSoup # library to parse HTML and XML documents\n", | |
| "\n", | |
| "from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe\n", | |
| "\n", | |
| "# Matplotlib and associated plotting modules\n", | |
| "import matplotlib.cm as cm\n", | |
| "import matplotlib.colors as colors\n", | |
| "\n", | |
| "# import k-means from clustering stage\n", | |
| "from sklearn.cluster import KMeans\n", | |
| "\n", | |
| "import folium # map rendering library\n", | |
| "\n", | |
| "print(\"Libraries imported.\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### 2. Scrap data from Wikipedia page into a DataFrame" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# send the GET request\n", | |
| "data = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# parse data from the html into a beautifulsoup object\n", | |
| "soup = BeautifulSoup(data, 'html.parser')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# create three lists to store table data\n", | |
| "postalCodeList = []\n", | |
| "boroughList = []\n", | |
| "neighborhoodList = []" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "**Using BeautifulSoup**\n", | |
| "\n", | |
| "```python\n", | |
| "# find the table\n", | |
| "soup.find('table').find_all('tr')\n", | |
| "\n", | |
| "# find all the rows of the table\n", | |
| "soup.find('table').find_all('tr')\n", | |
| "\n", | |
| "# for each row of the table, find all the table data\n", | |
| "for row in soup.find('table').find_all('tr'):\n", | |
| " cells = row.find_all('td')\n", | |
| "```" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# append the data into the respective lists\n", | |
| "for row in soup.find('table').find_all('tr'):\n", | |
| " cells = row.find_all('td')\n", | |
| " if(len(cells) > 0):\n", | |
| " postalCodeList.append(cells[0].text)\n", | |
| " boroughList.append(cells[1].text)\n", | |
| " neighborhoodList.append(cells[2].text.rstrip('\\n')) # avoid new lines in neighborhood cell" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>PostalCode</th>\n", | |
| " <th>Borough</th>\n", | |
| " <th>Neighborhood</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>M1A</td>\n", | |
| " <td>Not assigned</td>\n", | |
| " <td>Not assigned</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>M2A</td>\n", | |
| " <td>Not assigned</td>\n", | |
| " <td>Not assigned</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>M3A</td>\n", | |
| " <td>North York</td>\n", | |
| " <td>Parkwoods</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>M4A</td>\n", | |
| " <td>North York</td>\n", | |
| " <td>Victoria Village</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>M5A</td>\n", | |
| " <td>Downtown Toronto</td>\n", | |
| " <td>Harbourfront</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " PostalCode Borough Neighborhood\n", | |
| "0 M1A Not assigned Not assigned\n", | |
| "1 M2A Not assigned Not assigned\n", | |
| "2 M3A North York Parkwoods\n", | |
| "3 M4A North York Victoria Village\n", | |
| "4 M5A Downtown Toronto Harbourfront" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# create a new DataFrame from the three lists\n", | |
| "toronto_df = pd.DataFrame({\"PostalCode\": postalCodeList,\n", | |
| " \"Borough\": boroughList,\n", | |
| " \"Neighborhood\": neighborhoodList})\n", | |
| "\n", | |
| "toronto_df.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### 3. Drop cells with a borough that is \"Not assigned\"" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>PostalCode</th>\n", | |
| " <th>Borough</th>\n", | |
| " <th>Neighborhood</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>M3A</td>\n", | |
| " <td>North York</td>\n", | |
| " <td>Parkwoods</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>M4A</td>\n", | |
| " <td>North York</td>\n", | |
| " <td>Victoria Village</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>M5A</td>\n", | |
| " <td>Downtown Toronto</td>\n", | |
| " <td>Harbourfront</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>M6A</td>\n", | |
| " <td>North York</td>\n", | |
| " <td>Lawrence Heights</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>M6A</td>\n", | |
| " <td>North York</td>\n", | |
| " <td>Lawrence Manor</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " PostalCode Borough Neighborhood\n", | |
| "0 M3A North York Parkwoods\n", | |
| "1 M4A North York Victoria Village\n", | |
| "2 M5A Downtown Toronto Harbourfront\n", | |
| "3 M6A North York Lawrence Heights\n", | |
| "4 M6A North York Lawrence Manor" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# drop cells with a borough that is Not assigned\n", | |
| "toronto_df_dropna = toronto_df[toronto_df.Borough != \"Not assigned\"].reset_index(drop=True)\n", | |
| "toronto_df_dropna.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### 4. Group neighborhoods in the same borough\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>PostalCode</th>\n", | |
| " <th>Borough</th>\n", | |
| " <th>Neighborhood</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>M1B</td>\n", | |
| " <td>Scarborough</td>\n", | |
| " <td>Rouge, Malvern</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>M1C</td>\n", | |
| " <td>Scarborough</td>\n", | |
| " <td>Highland Creek, Rouge Hill, Port Union</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>M1E</td>\n", | |
| " <td>Scarborough</td>\n", | |
| " <td>Guildwood, Morningside, West Hill</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>M1G</td>\n", | |
| " <td>Scarborough</td>\n", | |
| " <td>Woburn</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>M1H</td>\n", | |
| " <td>Scarborough</td>\n", | |
| " <td>Cedarbrae</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " PostalCode Borough Neighborhood\n", | |
| "0 M1B Scarborough Rouge, Malvern\n", | |
| "1 M1C Scarborough Highland Creek, Rouge Hill, Port Union\n", | |
| "2 M1E Scarborough Guildwood, Morningside, West Hill\n", | |
| "3 M1G Scarborough Woburn\n", | |
| "4 M1H Scarborough Cedarbrae" | |
| ] | |
| }, | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# group neighborhoods in the same borough\n", | |
| "toronto_df_grouped = toronto_df_dropna.groupby([\"PostalCode\", \"Borough\"], as_index=False).agg(lambda x: \", \".join(x))\n", | |
| "toronto_df_grouped.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### 5. For Neighborhood=\"Not assigned\", make the value the same as Borough" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>PostalCode</th>\n", | |
| " <th>Borough</th>\n", | |
| " <th>Neighborhood</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>M1B</td>\n", | |
| " <td>Scarborough</td>\n", | |
| " <td>Rouge, Malvern</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>M1C</td>\n", | |
| " <td>Scarborough</td>\n", | |
| " <td>Highland Creek, Rouge Hill, Port Union</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>M1E</td>\n", | |
| " <td>Scarborough</td>\n", | |
| " <td>Guildwood, Morningside, West Hill</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>M1G</td>\n", | |
| " <td>Scarborough</td>\n", | |
| " <td>Woburn</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>M1H</td>\n", | |
| " <td>Scarborough</td>\n", | |
| " <td>Cedarbrae</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " PostalCode Borough Neighborhood\n", | |
| "0 M1B Scarborough Rouge, Malvern\n", | |
| "1 M1C Scarborough Highland Creek, Rouge Hill, Port Union\n", | |
| "2 M1E Scarborough Guildwood, Morningside, West Hill\n", | |
| "3 M1G Scarborough Woburn\n", | |
| "4 M1H Scarborough Cedarbrae" | |
| ] | |
| }, | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# for Neighborhood=\"Not assigned\", make the value the same as Borough\n", | |
| "for index, row in toronto_df_grouped.iterrows():\n", | |
| " if row[\"Neighborhood\"] == \"Not assigned\":\n", | |
| " row[\"Neighborhood\"] = row[\"Borough\"]\n", | |
| " \n", | |
| "toronto_df_grouped.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### 6. Check whether it is the same as required by the question" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>PostalCode</th>\n", | |
| " <th>Borough</th>\n", | |
| " <th>Neighborhood</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>M5G</td>\n", | |
| " <td>Downtown Toronto</td>\n", | |
| " <td>Central Bay Street</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>M2H</td>\n", | |
| " <td>North York</td>\n", | |
| " <td>Hillcrest Village</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>M4B</td>\n", | |
| " <td>East York</td>\n", | |
| " <td>Woodbine Gardens, Parkview Hill</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>M1J</td>\n", | |
| " <td>Scarborough</td>\n", | |
| " <td>Scarborough Village</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>M4G</td>\n", | |
| " <td>East York</td>\n", | |
| " <td>Leaside</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>M4M</td>\n", | |
| " <td>East Toronto</td>\n", | |
| " <td>Studio District</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>M1R</td>\n", | |
| " <td>Scarborough</td>\n", | |
| " <td>Maryvale, Wexford</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>M9V</td>\n", | |
| " <td>Etobicoke</td>\n", | |
| " <td>Albion Gardens, Beaumond Heights, Humbergate, ...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>M9L</td>\n", | |
| " <td>North York</td>\n", | |
| " <td>Humber Summit</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>M5V</td>\n", | |
| " <td>Downtown Toronto</td>\n", | |
| " <td>CN Tower, Bathurst Quay, Island airport, Harbo...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td>M1B</td>\n", | |
| " <td>Scarborough</td>\n", | |
| " <td>Rouge, Malvern</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td>M5A</td>\n", | |
| " <td>Downtown Toronto</td>\n", | |
| " <td>Harbourfront</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " PostalCode Borough \\\n", | |
| "0 M5G Downtown Toronto \n", | |
| "1 M2H North York \n", | |
| "2 M4B East York \n", | |
| "3 M1J Scarborough \n", | |
| "4 M4G East York \n", | |
| "5 M4M East Toronto \n", | |
| "6 M1R Scarborough \n", | |
| "7 M9V Etobicoke \n", | |
| "8 M9L North York \n", | |
| "9 M5V Downtown Toronto \n", | |
| "10 M1B Scarborough \n", | |
| "11 M5A Downtown Toronto \n", | |
| "\n", | |
| " Neighborhood \n", | |
| "0 Central Bay Street \n", | |
| "1 Hillcrest Village \n", | |
| "2 Woodbine Gardens, Parkview Hill \n", | |
| "3 Scarborough Village \n", | |
| "4 Leaside \n", | |
| "5 Studio District \n", | |
| "6 Maryvale, Wexford \n", | |
| "7 Albion Gardens, Beaumond Heights, Humbergate, ... \n", | |
| "8 Humber Summit \n", | |
| "9 CN Tower, Bathurst Quay, Island airport, Harbo... \n", | |
| "10 Rouge, Malvern \n", | |
| "11 Harbourfront " | |
| ] | |
| }, | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# create a new test dataframe\n", | |
| "column_names = [\"PostalCode\", \"Borough\", \"Neighborhood\"]\n", | |
| "test_df = pd.DataFrame(columns=column_names)\n", | |
| "\n", | |
| "test_list = [\"M5G\", \"M2H\", \"M4B\", \"M1J\", \"M4G\", \"M4M\", \"M1R\", \"M9V\", \"M9L\", \"M5V\", \"M1B\", \"M5A\"]\n", | |
| "\n", | |
| "for postcode in test_list:\n", | |
| " test_df = test_df.append(toronto_df_grouped[toronto_df_grouped[\"PostalCode\"]==postcode], ignore_index=True)\n", | |
| " \n", | |
| "test_df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### 7. Print the number of rows of the cleaned dataframe" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(103, 3)" | |
| ] | |
| }, | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# print the number of rows of the cleaned dataframe\n", | |
| "toronto_df_grouped.shape" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### 8. Load the coordinates from the csv file on Coursera" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Postal Code</th>\n", | |
| " <th>Latitude</th>\n", | |
| " <th>Longitude</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>M1B</td>\n", | |
| " <td>43.806686</td>\n", | |
| " <td>-79.194353</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>M1C</td>\n", | |
| " <td>43.784535</td>\n", | |
| " <td>-79.160497</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>M1E</td>\n", | |
| " <td>43.763573</td>\n", | |
| " <td>-79.188711</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>M1G</td>\n", | |
| " <td>43.770992</td>\n", | |
| " <td>-79.216917</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>M1H</td>\n", | |
| " <td>43.773136</td>\n", | |
| " <td>-79.239476</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Postal Code Latitude Longitude\n", | |
| "0 M1B 43.806686 -79.194353\n", | |
| "1 M1C 43.784535 -79.160497\n", | |
| "2 M1E 43.763573 -79.188711\n", | |
| "3 M1G 43.770992 -79.216917\n", | |
| "4 M1H 43.773136 -79.239476" | |
| ] | |
| }, | |
| "execution_count": 12, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# load the coordinates from the csv file on Coursera\n", | |
| "coordinates = pd.read_csv(\"Geospatial_Coordinates.csv\")\n", | |
| "coordinates.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>PostalCode</th>\n", | |
| " <th>Latitude</th>\n", | |
| " <th>Longitude</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>M1B</td>\n", | |
| " <td>43.806686</td>\n", | |
| " <td>-79.194353</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>M1C</td>\n", | |
| " <td>43.784535</td>\n", | |
| " <td>-79.160497</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>M1E</td>\n", | |
| " <td>43.763573</td>\n", | |
| " <td>-79.188711</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>M1G</td>\n", | |
| " <td>43.770992</td>\n", | |
| " <td>-79.216917</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>M1H</td>\n", | |
| " <td>43.773136</td>\n", | |
| " <td>-79.239476</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " PostalCode Latitude Longitude\n", | |
| "0 M1B 43.806686 -79.194353\n", | |
| "1 M1C 43.784535 -79.160497\n", | |
| "2 M1E 43.763573 -79.188711\n", | |
| "3 M1G 43.770992 -79.216917\n", | |
| "4 M1H 43.773136 -79.239476" | |
| ] | |
| }, | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# rename the column \"PostalCode\"\n", | |
| "coordinates.rename(columns={\"Postal Code\": \"PostalCode\"}, inplace=True)\n", | |
| "coordinates.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### 9. Merge two tables to get the coordinates" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>PostalCode</th>\n", | |
| " <th>Borough</th>\n", | |
| " <th>Neighborhood</th>\n", | |
| " <th>Latitude</th>\n", | |
| " <th>Longitude</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>M1B</td>\n", | |
| " <td>Scarborough</td>\n", | |
| " <td>Rouge, Malvern</td>\n", | |
| " <td>43.806686</td>\n", | |
| " <td>-79.194353</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>M1C</td>\n", | |
| " <td>Scarborough</td>\n", | |
| " <td>Highland Creek, Rouge Hill, Port Union</td>\n", | |
| " <td>43.784535</td>\n", | |
| " <td>-79.160497</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>M1E</td>\n", | |
| " <td>Scarborough</td>\n", | |
| " <td>Guildwood, Morningside, West Hill</td>\n", | |
| " <td>43.763573</td>\n", | |
| " <td>-79.188711</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>M1G</td>\n", | |
| " <td>Scarborough</td>\n", | |
| " <td>Woburn</td>\n", | |
| " <td>43.770992</td>\n", | |
| " <td>-79.216917</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>M1H</td>\n", | |
| " <td>Scarborough</td>\n", | |
| " <td>Cedarbrae</td>\n", | |
| " <td>43.773136</td>\n", | |
| " <td>-79.239476</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " PostalCode Borough Neighborhood Latitude \\\n", | |
| "0 M1B Scarborough Rouge, Malvern 43.806686 \n", | |
| "1 M1C Scarborough Highland Creek, Rouge Hill, Port Union 43.784535 \n", | |
| "2 M1E Scarborough Guildwood, Morningside, West Hill 43.763573 \n", | |
| "3 M1G Scarborough Woburn 43.770992 \n", | |
| "4 M1H Scarborough Cedarbrae 43.773136 \n", | |
| "\n", | |
| " Longitude \n", | |
| "0 -79.194353 \n", | |
| "1 -79.160497 \n", | |
| "2 -79.188711 \n", | |
| "3 -79.216917 \n", | |
| "4 -79.239476 " | |
| ] | |
| }, | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# merge two table on the column \"PostalCode\"\n", | |
| "toronto_df_new = toronto_df_grouped.merge(coordinates, on=\"PostalCode\", how=\"left\")\n", | |
| "toronto_df_new.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### 10. Finally, check to make sure the coordinates are added as required by the question" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>PostalCode</th>\n", | |
| " <th>Borough</th>\n", | |
| " <th>Neighborhood</th>\n", | |
| " <th>Latitude</th>\n", | |
| " <th>Longitude</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>M5G</td>\n", | |
| " <td>Downtown Toronto</td>\n", | |
| " <td>Central Bay Street</td>\n", | |
| " <td>43.657952</td>\n", | |
| " <td>-79.387383</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>M2H</td>\n", | |
| " <td>North York</td>\n", | |
| " <td>Hillcrest Village</td>\n", | |
| " <td>43.803762</td>\n", | |
| " <td>-79.363452</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>M4B</td>\n", | |
| " <td>East York</td>\n", | |
| " <td>Woodbine Gardens, Parkview Hill</td>\n", | |
| " <td>43.706397</td>\n", | |
| " <td>-79.309937</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>M1J</td>\n", | |
| " <td>Scarborough</td>\n", | |
| " <td>Scarborough Village</td>\n", | |
| " <td>43.744734</td>\n", | |
| " <td>-79.239476</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>M4G</td>\n", | |
| " <td>East York</td>\n", | |
| " <td>Leaside</td>\n", | |
| " <td>43.709060</td>\n", | |
| " <td>-79.363452</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>M4M</td>\n", | |
| " <td>East Toronto</td>\n", | |
| " <td>Studio District</td>\n", | |
| " <td>43.659526</td>\n", | |
| " <td>-79.340923</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>M1R</td>\n", | |
| " <td>Scarborough</td>\n", | |
| " <td>Maryvale, Wexford</td>\n", | |
| " <td>43.750072</td>\n", | |
| " <td>-79.295849</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>M9V</td>\n", | |
| " <td>Etobicoke</td>\n", | |
| " <td>Albion Gardens, Beaumond Heights, Humbergate, ...</td>\n", | |
| " <td>43.739416</td>\n", | |
| " <td>-79.588437</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>M9L</td>\n", | |
| " <td>North York</td>\n", | |
| " <td>Humber Summit</td>\n", | |
| " <td>43.756303</td>\n", | |
| " <td>-79.565963</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>M5V</td>\n", | |
| " <td>Downtown Toronto</td>\n", | |
| " <td>CN Tower, Bathurst Quay, Island airport, Harbo...</td>\n", | |
| " <td>43.628947</td>\n", | |
| " <td>-79.394420</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td>M1B</td>\n", | |
| " <td>Scarborough</td>\n", | |
| " <td>Rouge, Malvern</td>\n", | |
| " <td>43.806686</td>\n", | |
| " <td>-79.194353</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td>M5A</td>\n", | |
| " <td>Downtown Toronto</td>\n", | |
| " <td>Harbourfront</td>\n", | |
| " <td>43.654260</td>\n", | |
| " <td>-79.360636</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " PostalCode Borough \\\n", | |
| "0 M5G Downtown Toronto \n", | |
| "1 M2H North York \n", | |
| "2 M4B East York \n", | |
| "3 M1J Scarborough \n", | |
| "4 M4G East York \n", | |
| "5 M4M East Toronto \n", | |
| "6 M1R Scarborough \n", | |
| "7 M9V Etobicoke \n", | |
| "8 M9L North York \n", | |
| "9 M5V Downtown Toronto \n", | |
| "10 M1B Scarborough \n", | |
| "11 M5A Downtown Toronto \n", | |
| "\n", | |
| " Neighborhood Latitude Longitude \n", | |
| "0 Central Bay Street 43.657952 -79.387383 \n", | |
| "1 Hillcrest Village 43.803762 -79.363452 \n", | |
| "2 Woodbine Gardens, Parkview Hill 43.706397 -79.309937 \n", | |
| "3 Scarborough Village 43.744734 -79.239476 \n", | |
| "4 Leaside 43.709060 -79.363452 \n", | |
| "5 Studio District 43.659526 -79.340923 \n", | |
| "6 Maryvale, Wexford 43.750072 -79.295849 \n", | |
| "7 Albion Gardens, Beaumond Heights, Humbergate, ... 43.739416 -79.588437 \n", | |
| "8 Humber Summit 43.756303 -79.565963 \n", | |
| "9 CN Tower, Bathurst Quay, Island airport, Harbo... 43.628947 -79.394420 \n", | |
| "10 Rouge, Malvern 43.806686 -79.194353 \n", | |
| "11 Harbourfront 43.654260 -79.360636 " | |
| ] | |
| }, | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# create a new test dataframe\n", | |
| "column_names = [\"PostalCode\", \"Borough\", \"Neighborhood\", \"Latitude\", \"Longitude\"]\n", | |
| "test_df = pd.DataFrame(columns=column_names)\n", | |
| "\n", | |
| "test_list = [\"M5G\", \"M2H\", \"M4B\", \"M1J\", \"M4G\", \"M4M\", \"M1R\", \"M9V\", \"M9L\", \"M5V\", \"M1B\", \"M5A\"]\n", | |
| "\n", | |
| "for postcode in test_list:\n", | |
| " test_df = test_df.append(toronto_df_new[toronto_df_new[\"PostalCode\"]==postcode], ignore_index=True)\n", | |
| " \n", | |
| "test_df" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.7.6" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 4 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment