Skip to content

Instantly share code, notes, and snippets.

@FavioVazquez
Created July 19, 2019 19:30
Show Gist options
  • Save FavioVazquez/1700b1a941b17532fa6bf09e9dfd8e3e to your computer and use it in GitHub Desktop.
Save FavioVazquez/1700b1a941b17532fa6bf09e9dfd8e3e to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style> /* Tables*/\n",
"\n",
" .data_type {\n",
" font-size: 0.8em;\n",
" font-weight: normal;\n",
" }\n",
"\n",
" .column_name {\n",
" font-size: 1.2em;\n",
" }\n",
"\n",
" .info_items {\n",
" margin: 10px 0;\n",
" font-size: 0.8em;\n",
" }\n",
"\n",
" .optimus_table td {\n",
" border: 0px;\n",
" }\n",
"\n",
" .optimus_table tr:nth-child(even) {\n",
" background-color: #f2f2f2 !important;\n",
" }\n",
"\n",
" .optimus_table tr:nth-child(odd) {\n",
" background-color: #ffffff !important;\n",
" }\n",
"\n",
" .optimus_table thead {\n",
" border-bottom: 1px solid black;\n",
" }\n",
" .optimus_table{\n",
" font-size: 12px;\n",
" }\n",
"\n",
" .optimus_table tbody{\n",
" font-family: monospace;\n",
" border-bottom: 1px solid #cccccc;\n",
" }\n",
"\n",
" /* Profiler */\n",
" .main{\n",
" width:100%;\n",
" overflow:auto;\n",
" border-bottom:1px solid #eeeeee;\n",
" padding: 10px 0;\n",
" }\n",
" .panel_profiler{\n",
" margin-right:2%;\n",
" float:left;\n",
" padding-bottom:2%;\n",
" }\n",
" .panel_profiler tbody{\n",
" font-family:monospace;\n",
" }\n",
" .title_profiler{\n",
" padding:20px;\n",
" background-color: #eeeeee\n",
" }\n",
" .info{\n",
" overflow: auto\n",
" }\n",
" .main td, main th{\n",
" padding:0em\n",
" }\n",
" .panel_profiler td {\n",
" padding:0.2em\n",
" }\n",
" .none, .true{\n",
" color:#0000ff\n",
" }\n",
" .optimus_table th {\n",
" font-family:sans-serif;\n",
" }\n",
"\n",
" .info_items{\n",
" font-family:sans-serif;\n",
" font-size:10px;\n",
" }\n",
"</style>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Import the necessary libraries\n",
"from optimus import Optimus\n",
"from pyspark.sql.functions import *\n",
"import matplotlib.pyplot as plt \n",
"import pandas as pd\n",
"import numpy as np\n",
"op = Optimus()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Read the data and create Spark DF"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"url = 'https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv'\n",
" \n",
"chipo_pd = pd.read_csv(url, sep = '\\t')\n",
"chipo_pd[['item_name', 'choice_description', 'item_price']] = chipo_pd[['item_name', 'choice_description', 'item_price']].astype(str)\n",
"\n",
"chipo = op.spark.createDataFrame(chipo_pd)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# See the first 10 entries"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"\n",
"\n",
"\n",
"<div class=\"info_items\">Viewing 10 of 4622 rows / 5 columns</div>\n",
"<div class=\"info_items\">1 partition(s)</div>\n",
"\n",
"<table class=\"optimus_table\">\n",
" <thead>\n",
" <tr>\n",
" \n",
" <th>\n",
" <div class=\"column_name\">order_id</div>\n",
" <div class=\"data_type\">1 (bigint)</div>\n",
" <div class=\"data_type\">\n",
" \n",
" nullable\n",
" \n",
" </div>\n",
" </th>\n",
" \n",
" <th>\n",
" <div class=\"column_name\">quantity</div>\n",
" <div class=\"data_type\">2 (bigint)</div>\n",
" <div class=\"data_type\">\n",
" \n",
" nullable\n",
" \n",
" </div>\n",
" </th>\n",
" \n",
" <th>\n",
" <div class=\"column_name\">item_name</div>\n",
" <div class=\"data_type\">3 (string)</div>\n",
" <div class=\"data_type\">\n",
" \n",
" nullable\n",
" \n",
" </div>\n",
" </th>\n",
" \n",
" <th>\n",
" <div class=\"column_name\">choice_description</div>\n",
" <div class=\"data_type\">4 (string)</div>\n",
" <div class=\"data_type\">\n",
" \n",
" nullable\n",
" \n",
" </div>\n",
" </th>\n",
" \n",
" <th>\n",
" <div class=\"column_name\">item_price</div>\n",
" <div class=\"data_type\">5 (string)</div>\n",
" <div class=\"data_type\">\n",
" \n",
" nullable\n",
" \n",
" </div>\n",
" </th>\n",
" \n",
" </tr>\n",
"\n",
" </thead>\n",
" <tbody>\n",
" \n",
" <tr>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='1'>1\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='1'>1\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='Chips&#8901;and&#8901;Fresh&#8901;Tomato&#8901;Salsa'>Chips&#8901;and&#8901;Fresh&#8901;Tomato&#8901;Salsa\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='nan'>nan\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='$2.39&#8901;'>$2.39&#8901;\n",
" </div>\n",
" </td>\n",
" \n",
" </tr>\n",
" \n",
" <tr>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='1'>1\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='1'>1\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='Izze'>Izze\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='[Clementine]'>[Clementine]\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='$3.39&#8901;'>$3.39&#8901;\n",
" </div>\n",
" </td>\n",
" \n",
" </tr>\n",
" \n",
" <tr>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='1'>1\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='1'>1\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='Nantucket&#8901;Nectar'>Nantucket&#8901;Nectar\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='[Apple]'>[Apple]\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='$3.39&#8901;'>$3.39&#8901;\n",
" </div>\n",
" </td>\n",
" \n",
" </tr>\n",
" \n",
" <tr>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='1'>1\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='1'>1\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='Chips&#8901;and&#8901;Tomatillo-Green&#8901;Chili&#8901;Salsa'>Chips&#8901;and&#8901;Tomatillo-Green&#8901;Chili&#8901;Salsa\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='nan'>nan\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='$2.39&#8901;'>$2.39&#8901;\n",
" </div>\n",
" </td>\n",
" \n",
" </tr>\n",
" \n",
" <tr>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='2'>2\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='2'>2\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='Chicken&#8901;Bowl'>Chicken&#8901;Bowl\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='[Tomatillo-Red&#8901;Chili&#8901;Salsa&#8901;(Hot),&#8901;[Black&#8901;Beans,&#8901;Rice,&#8901;Cheese,&#8901;Sour&#8901;Cream]]'>[Tomatillo-Red&#8901;Chili&#8901;Salsa&#8901;(Hot),&#8901;[Black&#8901;Beans,&#8901;Rice,&#8901;Cheese,&#8901;Sour&#8901;Cream]]\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='$16.98&#8901;'>$16.98&#8901;\n",
" </div>\n",
" </td>\n",
" \n",
" </tr>\n",
" \n",
" <tr>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='3'>3\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='1'>1\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='Chicken&#8901;Bowl'>Chicken&#8901;Bowl\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='[Fresh&#8901;Tomato&#8901;Salsa&#8901;(Mild),&#8901;[Rice,&#8901;Cheese,&#8901;Sour&#8901;Cream,&#8901;Guacamole,&#8901;Lettuce]]'>[Fresh&#8901;Tomato&#8901;Salsa&#8901;(Mild),&#8901;[Rice,&#8901;Cheese,&#8901;Sour&#8901;Cream,&#8901;Guacamole,&#8901;Lettuce]]\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='$10.98&#8901;'>$10.98&#8901;\n",
" </div>\n",
" </td>\n",
" \n",
" </tr>\n",
" \n",
" <tr>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='3'>3\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='1'>1\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='Side&#8901;of&#8901;Chips'>Side&#8901;of&#8901;Chips\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='nan'>nan\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='$1.69&#8901;'>$1.69&#8901;\n",
" </div>\n",
" </td>\n",
" \n",
" </tr>\n",
" \n",
" <tr>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='4'>4\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='1'>1\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='Steak&#8901;Burrito'>Steak&#8901;Burrito\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='[Tomatillo&#8901;Red&#8901;Chili&#8901;Salsa,&#8901;[Fajita&#8901;Vegetables,&#8901;Black&#8901;Beans,&#8901;Pinto&#8901;Beans,&#8901;Cheese,&#8901;Sour&#8901;Cream,&#8901;Guacamole,&#8901;Lettuce]]'>[Tomatillo&#8901;Red&#8901;Chili&#8901;Salsa,&#8901;[Fajita&#8901;Vegetables,&#8901;Black&#8901;Beans,&#8901;Pinto&#8901;Bean...\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='$11.75&#8901;'>$11.75&#8901;\n",
" </div>\n",
" </td>\n",
" \n",
" </tr>\n",
" \n",
" <tr>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='4'>4\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='1'>1\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='Steak&#8901;Soft&#8901;Tacos'>Steak&#8901;Soft&#8901;Tacos\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='[Tomatillo&#8901;Green&#8901;Chili&#8901;Salsa,&#8901;[Pinto&#8901;Beans,&#8901;Cheese,&#8901;Sour&#8901;Cream,&#8901;Lettuce]]'>[Tomatillo&#8901;Green&#8901;Chili&#8901;Salsa,&#8901;[Pinto&#8901;Beans,&#8901;Cheese,&#8901;Sour&#8901;Cream,&#8901;Lettuce]]\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='$9.25&#8901;'>$9.25&#8901;\n",
" </div>\n",
" </td>\n",
" \n",
" </tr>\n",
" \n",
" <tr>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='5'>5\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='1'>1\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='Steak&#8901;Burrito'>Steak&#8901;Burrito\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='[Fresh&#8901;Tomato&#8901;Salsa,&#8901;[Rice,&#8901;Black&#8901;Beans,&#8901;Pinto&#8901;Beans,&#8901;Cheese,&#8901;Sour&#8901;Cream,&#8901;Lettuce]]'>[Fresh&#8901;Tomato&#8901;Salsa,&#8901;[Rice,&#8901;Black&#8901;Beans,&#8901;Pinto&#8901;Beans,&#8901;Cheese,&#8901;Sour&#890...\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='$9.25&#8901;'>$9.25&#8901;\n",
" </div>\n",
" </td>\n",
" \n",
" </tr>\n",
" \n",
" </tbody>\n",
"</table>\n",
"\n",
"\n",
"<div class=\"info_items\">Viewing 10 of 4622 rows / 5 columns</div>\n",
"<div class=\"info_items\">1 partition(s)</div>\n"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"chipo.table(10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Create a histogram of the top 5 items bought"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"(chipo.groupby(\"item_name\")\n",
" .sum(\"quantity\")\n",
" .cols.rename(\"sum(quantity)\", \"quantity\")\n",
" .sort(desc(\"quantity\"))\n",
" .toPandas()[0:5].plot(kind=\"bar\"))\n",
"\n",
"x_labels = [row.item_name for row in (chipo.groupby(\"item_name\")\n",
" .sum(\"quantity\")\n",
" .cols.rename(\"sum(quantity)\", \"quantity\")\n",
" .sort(desc(\"quantity\"))\n",
" .select(\"item_name\")\n",
" .collect())][0:5]\n",
"\n",
"plt.xticks(np.arange(5), x_labels)\n",
"plt.xlabel('Items')\n",
"plt.ylabel('Price')\n",
"plt.title('Most ordered Chipotle\\'s Items')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Create a scatterplot with the number of items orderered per order price"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"# Use substr (like in SQL) to get from the first numer to the end and then cast it\n",
"chipo = chipo.withColumn(\"item_price\", chipo.item_price.substr(2,10).cast(\"float\"))"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"\n",
"\n",
"\n",
"<div class=\"info_items\">Viewing 5 of 4622 rows / 5 columns</div>\n",
"<div class=\"info_items\">1 partition(s)</div>\n",
"\n",
"<table class=\"optimus_table\">\n",
" <thead>\n",
" <tr>\n",
" \n",
" <th>\n",
" <div class=\"column_name\">order_id</div>\n",
" <div class=\"data_type\">1 (bigint)</div>\n",
" <div class=\"data_type\">\n",
" \n",
" nullable\n",
" \n",
" </div>\n",
" </th>\n",
" \n",
" <th>\n",
" <div class=\"column_name\">quantity</div>\n",
" <div class=\"data_type\">2 (bigint)</div>\n",
" <div class=\"data_type\">\n",
" \n",
" nullable\n",
" \n",
" </div>\n",
" </th>\n",
" \n",
" <th>\n",
" <div class=\"column_name\">item_name</div>\n",
" <div class=\"data_type\">3 (string)</div>\n",
" <div class=\"data_type\">\n",
" \n",
" nullable\n",
" \n",
" </div>\n",
" </th>\n",
" \n",
" <th>\n",
" <div class=\"column_name\">choice_description</div>\n",
" <div class=\"data_type\">4 (string)</div>\n",
" <div class=\"data_type\">\n",
" \n",
" nullable\n",
" \n",
" </div>\n",
" </th>\n",
" \n",
" <th>\n",
" <div class=\"column_name\">item_price</div>\n",
" <div class=\"data_type\">5 (float)</div>\n",
" <div class=\"data_type\">\n",
" \n",
" nullable\n",
" \n",
" </div>\n",
" </th>\n",
" \n",
" </tr>\n",
"\n",
" </thead>\n",
" <tbody>\n",
" \n",
" <tr>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='1'>1\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='1'>1\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='Chips&#8901;and&#8901;Fresh&#8901;Tomato&#8901;Salsa'>Chips&#8901;and&#8901;Fresh&#8901;Tomato&#8901;Salsa\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='nan'>nan\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='2.390000104904175'>2.390000104904175\n",
" </div>\n",
" </td>\n",
" \n",
" </tr>\n",
" \n",
" <tr>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='1'>1\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='1'>1\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='Izze'>Izze\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='[Clementine]'>[Clementine]\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='3.390000104904175'>3.390000104904175\n",
" </div>\n",
" </td>\n",
" \n",
" </tr>\n",
" \n",
" <tr>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='1'>1\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='1'>1\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='Nantucket&#8901;Nectar'>Nantucket&#8901;Nectar\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='[Apple]'>[Apple]\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='3.390000104904175'>3.390000104904175\n",
" </div>\n",
" </td>\n",
" \n",
" </tr>\n",
" \n",
" <tr>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='1'>1\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='1'>1\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='Chips&#8901;and&#8901;Tomatillo-Green&#8901;Chili&#8901;Salsa'>Chips&#8901;and&#8901;Tomatillo-Green&#8901;Chili&#8901;Salsa\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='nan'>nan\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='2.390000104904175'>2.390000104904175\n",
" </div>\n",
" </td>\n",
" \n",
" </tr>\n",
" \n",
" <tr>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='2'>2\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='2'>2\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='Chicken&#8901;Bowl'>Chicken&#8901;Bowl\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='[Tomatillo-Red&#8901;Chili&#8901;Salsa&#8901;(Hot),&#8901;[Black&#8901;Beans,&#8901;Rice,&#8901;Cheese,&#8901;Sour&#8901;Cream]]'>[Tomatillo-Red&#8901;Chili&#8901;Salsa&#8901;(Hot),&#8901;[Black&#8901;Beans,&#8901;Rice,&#8901;Cheese,&#8901;Sour&#8901;Cream]]\n",
" </div>\n",
" </td>\n",
" \n",
" <td>\n",
" <div class=\" \"\n",
" title='16.979999542236328'>16.979999542236328\n",
" </div>\n",
" </td>\n",
" \n",
" </tr>\n",
" \n",
" </tbody>\n",
"</table>\n",
"\n",
"\n",
"<div class=\"info_items\">Viewing 5 of 4622 rows / 5 columns</div>\n",
"<div class=\"info_items\">1 partition(s)</div>\n"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"chipo.table(5)"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"orders = chipo.groupby('order_id').sum().cols.rename([ (\"sum(quantity)\",(\"quantity\")), (\"sum(item_price)\",(\"item_price\"))])"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 864x360 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"orders.plot.scatter([\"item_price\", \"quantity\"])\n",
"plt.xlabel('Order Price')\n",
"plt.ylabel('Items ordered')\n",
"plt.title('Number of items ordered per order price')\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment