Created
December 11, 2015 06:09
-
-
Save myui/b959716c088e8018c99c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 61, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"#Necessary imports\n", | |
"import pandas as pd\n", | |
"import pandas_td as td\n", | |
"import tdclient" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# Set up your connection and querying engines\n", | |
"# Please enter your Master API Key in the space provided\n", | |
"# Please enter the name of the database you will be using throughout the exercise where indicated\n", | |
"apikey = '<PUT MASTER API KEY HERE>'\n", | |
"endpoint='https://api.treasuredata.com'\n", | |
"con = td.connect(apikey=apikey, endpoint=endpoint)\n", | |
"engine_presto = con.query_engine(database='<ENTER DATABASE>', type='presto')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# Create the table in the database for the iris dataset\n", | |
"# Please enter the name of the database in the space provided \n", | |
"with tdclient.Client(apikey=apikey,endpoint=endpoint) as client:\n", | |
" db = client.database('<ENTER DATABASE>')\n", | |
" db.create_log_table('iris')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 53, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>sepal_length</th>\n", | |
" <th>sepal_width</th>\n", | |
" <th>petal_length</th>\n", | |
" <th>petal_width</th>\n", | |
" <th>class</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>5.1</td>\n", | |
" <td>3.5</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>4.9</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>4.7</td>\n", | |
" <td>3.2</td>\n", | |
" <td>1.3</td>\n", | |
" <td>0.2</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4.6</td>\n", | |
" <td>3.1</td>\n", | |
" <td>1.5</td>\n", | |
" <td>0.2</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5.0</td>\n", | |
" <td>3.6</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>5.4</td>\n", | |
" <td>3.9</td>\n", | |
" <td>1.7</td>\n", | |
" <td>0.4</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>4.6</td>\n", | |
" <td>3.4</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.3</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>5.0</td>\n", | |
" <td>3.4</td>\n", | |
" <td>1.5</td>\n", | |
" <td>0.2</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>4.4</td>\n", | |
" <td>2.9</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>4.9</td>\n", | |
" <td>3.1</td>\n", | |
" <td>1.5</td>\n", | |
" <td>0.1</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>5.4</td>\n", | |
" <td>3.7</td>\n", | |
" <td>1.5</td>\n", | |
" <td>0.2</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>4.8</td>\n", | |
" <td>3.4</td>\n", | |
" <td>1.6</td>\n", | |
" <td>0.2</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>4.8</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.1</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>4.3</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1.1</td>\n", | |
" <td>0.1</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>5.8</td>\n", | |
" <td>4.0</td>\n", | |
" <td>1.2</td>\n", | |
" <td>0.2</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>5.7</td>\n", | |
" <td>4.4</td>\n", | |
" <td>1.5</td>\n", | |
" <td>0.4</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>5.4</td>\n", | |
" <td>3.9</td>\n", | |
" <td>1.3</td>\n", | |
" <td>0.4</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>17</th>\n", | |
" <td>5.1</td>\n", | |
" <td>3.5</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.3</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>5.7</td>\n", | |
" <td>3.8</td>\n", | |
" <td>1.7</td>\n", | |
" <td>0.3</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td>5.1</td>\n", | |
" <td>3.8</td>\n", | |
" <td>1.5</td>\n", | |
" <td>0.3</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>20</th>\n", | |
" <td>5.4</td>\n", | |
" <td>3.4</td>\n", | |
" <td>1.7</td>\n", | |
" <td>0.2</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>21</th>\n", | |
" <td>5.1</td>\n", | |
" <td>3.7</td>\n", | |
" <td>1.5</td>\n", | |
" <td>0.4</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>22</th>\n", | |
" <td>4.6</td>\n", | |
" <td>3.6</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0.2</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>23</th>\n", | |
" <td>5.1</td>\n", | |
" <td>3.3</td>\n", | |
" <td>1.7</td>\n", | |
" <td>0.5</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>24</th>\n", | |
" <td>4.8</td>\n", | |
" <td>3.4</td>\n", | |
" <td>1.9</td>\n", | |
" <td>0.2</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25</th>\n", | |
" <td>5.0</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1.6</td>\n", | |
" <td>0.2</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>26</th>\n", | |
" <td>5.0</td>\n", | |
" <td>3.4</td>\n", | |
" <td>1.6</td>\n", | |
" <td>0.4</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>27</th>\n", | |
" <td>5.2</td>\n", | |
" <td>3.5</td>\n", | |
" <td>1.5</td>\n", | |
" <td>0.2</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>28</th>\n", | |
" <td>5.2</td>\n", | |
" <td>3.4</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>29</th>\n", | |
" <td>4.7</td>\n", | |
" <td>3.2</td>\n", | |
" <td>1.6</td>\n", | |
" <td>0.2</td>\n", | |
" <td>Iris-setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>120</th>\n", | |
" <td>6.9</td>\n", | |
" <td>3.2</td>\n", | |
" <td>5.7</td>\n", | |
" <td>2.3</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>121</th>\n", | |
" <td>5.6</td>\n", | |
" <td>2.8</td>\n", | |
" <td>4.9</td>\n", | |
" <td>2.0</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>122</th>\n", | |
" <td>7.7</td>\n", | |
" <td>2.8</td>\n", | |
" <td>6.7</td>\n", | |
" <td>2.0</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>123</th>\n", | |
" <td>6.3</td>\n", | |
" <td>2.7</td>\n", | |
" <td>4.9</td>\n", | |
" <td>1.8</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>124</th>\n", | |
" <td>6.7</td>\n", | |
" <td>3.3</td>\n", | |
" <td>5.7</td>\n", | |
" <td>2.1</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>125</th>\n", | |
" <td>7.2</td>\n", | |
" <td>3.2</td>\n", | |
" <td>6.0</td>\n", | |
" <td>1.8</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>126</th>\n", | |
" <td>6.2</td>\n", | |
" <td>2.8</td>\n", | |
" <td>4.8</td>\n", | |
" <td>1.8</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>127</th>\n", | |
" <td>6.1</td>\n", | |
" <td>3.0</td>\n", | |
" <td>4.9</td>\n", | |
" <td>1.8</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>128</th>\n", | |
" <td>6.4</td>\n", | |
" <td>2.8</td>\n", | |
" <td>5.6</td>\n", | |
" <td>2.1</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>129</th>\n", | |
" <td>7.2</td>\n", | |
" <td>3.0</td>\n", | |
" <td>5.8</td>\n", | |
" <td>1.6</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>130</th>\n", | |
" <td>7.4</td>\n", | |
" <td>2.8</td>\n", | |
" <td>6.1</td>\n", | |
" <td>1.9</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>131</th>\n", | |
" <td>7.9</td>\n", | |
" <td>3.8</td>\n", | |
" <td>6.4</td>\n", | |
" <td>2.0</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>132</th>\n", | |
" <td>6.4</td>\n", | |
" <td>2.8</td>\n", | |
" <td>5.6</td>\n", | |
" <td>2.2</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>133</th>\n", | |
" <td>6.3</td>\n", | |
" <td>2.8</td>\n", | |
" <td>5.1</td>\n", | |
" <td>1.5</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>134</th>\n", | |
" <td>6.1</td>\n", | |
" <td>2.6</td>\n", | |
" <td>5.6</td>\n", | |
" <td>1.4</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>135</th>\n", | |
" <td>7.7</td>\n", | |
" <td>3.0</td>\n", | |
" <td>6.1</td>\n", | |
" <td>2.3</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>136</th>\n", | |
" <td>6.3</td>\n", | |
" <td>3.4</td>\n", | |
" <td>5.6</td>\n", | |
" <td>2.4</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>137</th>\n", | |
" <td>6.4</td>\n", | |
" <td>3.1</td>\n", | |
" <td>5.5</td>\n", | |
" <td>1.8</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>138</th>\n", | |
" <td>6.0</td>\n", | |
" <td>3.0</td>\n", | |
" <td>4.8</td>\n", | |
" <td>1.8</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>139</th>\n", | |
" <td>6.9</td>\n", | |
" <td>3.1</td>\n", | |
" <td>5.4</td>\n", | |
" <td>2.1</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>140</th>\n", | |
" <td>6.7</td>\n", | |
" <td>3.1</td>\n", | |
" <td>5.6</td>\n", | |
" <td>2.4</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>141</th>\n", | |
" <td>6.9</td>\n", | |
" <td>3.1</td>\n", | |
" <td>5.1</td>\n", | |
" <td>2.3</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>142</th>\n", | |
" <td>5.8</td>\n", | |
" <td>2.7</td>\n", | |
" <td>5.1</td>\n", | |
" <td>1.9</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>143</th>\n", | |
" <td>6.8</td>\n", | |
" <td>3.2</td>\n", | |
" <td>5.9</td>\n", | |
" <td>2.3</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>144</th>\n", | |
" <td>6.7</td>\n", | |
" <td>3.3</td>\n", | |
" <td>5.7</td>\n", | |
" <td>2.5</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>145</th>\n", | |
" <td>6.7</td>\n", | |
" <td>3.0</td>\n", | |
" <td>5.2</td>\n", | |
" <td>2.3</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>146</th>\n", | |
" <td>6.3</td>\n", | |
" <td>2.5</td>\n", | |
" <td>5.0</td>\n", | |
" <td>1.9</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>147</th>\n", | |
" <td>6.5</td>\n", | |
" <td>3.0</td>\n", | |
" <td>5.2</td>\n", | |
" <td>2.0</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>148</th>\n", | |
" <td>6.2</td>\n", | |
" <td>3.4</td>\n", | |
" <td>5.4</td>\n", | |
" <td>2.3</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>149</th>\n", | |
" <td>5.9</td>\n", | |
" <td>3.0</td>\n", | |
" <td>5.1</td>\n", | |
" <td>1.8</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>150 rows × 5 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" sepal_length sepal_width petal_length petal_width class\n", | |
"0 5.1 3.5 1.4 0.2 Iris-setosa\n", | |
"1 4.9 3.0 1.4 0.2 Iris-setosa\n", | |
"2 4.7 3.2 1.3 0.2 Iris-setosa\n", | |
"3 4.6 3.1 1.5 0.2 Iris-setosa\n", | |
"4 5.0 3.6 1.4 0.2 Iris-setosa\n", | |
"5 5.4 3.9 1.7 0.4 Iris-setosa\n", | |
"6 4.6 3.4 1.4 0.3 Iris-setosa\n", | |
"7 5.0 3.4 1.5 0.2 Iris-setosa\n", | |
"8 4.4 2.9 1.4 0.2 Iris-setosa\n", | |
"9 4.9 3.1 1.5 0.1 Iris-setosa\n", | |
"10 5.4 3.7 1.5 0.2 Iris-setosa\n", | |
"11 4.8 3.4 1.6 0.2 Iris-setosa\n", | |
"12 4.8 3.0 1.4 0.1 Iris-setosa\n", | |
"13 4.3 3.0 1.1 0.1 Iris-setosa\n", | |
"14 5.8 4.0 1.2 0.2 Iris-setosa\n", | |
"15 5.7 4.4 1.5 0.4 Iris-setosa\n", | |
"16 5.4 3.9 1.3 0.4 Iris-setosa\n", | |
"17 5.1 3.5 1.4 0.3 Iris-setosa\n", | |
"18 5.7 3.8 1.7 0.3 Iris-setosa\n", | |
"19 5.1 3.8 1.5 0.3 Iris-setosa\n", | |
"20 5.4 3.4 1.7 0.2 Iris-setosa\n", | |
"21 5.1 3.7 1.5 0.4 Iris-setosa\n", | |
"22 4.6 3.6 1.0 0.2 Iris-setosa\n", | |
"23 5.1 3.3 1.7 0.5 Iris-setosa\n", | |
"24 4.8 3.4 1.9 0.2 Iris-setosa\n", | |
"25 5.0 3.0 1.6 0.2 Iris-setosa\n", | |
"26 5.0 3.4 1.6 0.4 Iris-setosa\n", | |
"27 5.2 3.5 1.5 0.2 Iris-setosa\n", | |
"28 5.2 3.4 1.4 0.2 Iris-setosa\n", | |
"29 4.7 3.2 1.6 0.2 Iris-setosa\n", | |
".. ... ... ... ... ...\n", | |
"120 6.9 3.2 5.7 2.3 Iris-virginica\n", | |
"121 5.6 2.8 4.9 2.0 Iris-virginica\n", | |
"122 7.7 2.8 6.7 2.0 Iris-virginica\n", | |
"123 6.3 2.7 4.9 1.8 Iris-virginica\n", | |
"124 6.7 3.3 5.7 2.1 Iris-virginica\n", | |
"125 7.2 3.2 6.0 1.8 Iris-virginica\n", | |
"126 6.2 2.8 4.8 1.8 Iris-virginica\n", | |
"127 6.1 3.0 4.9 1.8 Iris-virginica\n", | |
"128 6.4 2.8 5.6 2.1 Iris-virginica\n", | |
"129 7.2 3.0 5.8 1.6 Iris-virginica\n", | |
"130 7.4 2.8 6.1 1.9 Iris-virginica\n", | |
"131 7.9 3.8 6.4 2.0 Iris-virginica\n", | |
"132 6.4 2.8 5.6 2.2 Iris-virginica\n", | |
"133 6.3 2.8 5.1 1.5 Iris-virginica\n", | |
"134 6.1 2.6 5.6 1.4 Iris-virginica\n", | |
"135 7.7 3.0 6.1 2.3 Iris-virginica\n", | |
"136 6.3 3.4 5.6 2.4 Iris-virginica\n", | |
"137 6.4 3.1 5.5 1.8 Iris-virginica\n", | |
"138 6.0 3.0 4.8 1.8 Iris-virginica\n", | |
"139 6.9 3.1 5.4 2.1 Iris-virginica\n", | |
"140 6.7 3.1 5.6 2.4 Iris-virginica\n", | |
"141 6.9 3.1 5.1 2.3 Iris-virginica\n", | |
"142 5.8 2.7 5.1 1.9 Iris-virginica\n", | |
"143 6.8 3.2 5.9 2.3 Iris-virginica\n", | |
"144 6.7 3.3 5.7 2.5 Iris-virginica\n", | |
"145 6.7 3.0 5.2 2.3 Iris-virginica\n", | |
"146 6.3 2.5 5.0 1.9 Iris-virginica\n", | |
"147 6.5 3.0 5.2 2.0 Iris-virginica\n", | |
"148 6.2 3.4 5.4 2.3 Iris-virginica\n", | |
"149 5.9 3.0 5.1 1.8 Iris-virginica\n", | |
"\n", | |
"[150 rows x 5 columns]" | |
] | |
}, | |
"execution_count": 53, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Load the original iris dataset into pandas\n", | |
"# Please enter the filepath to where you saved the Iris data file, must be in csv format\n", | |
"df = pd.read_csv('<PATH TO IRIS CSV FILE>', header=0)\n", | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# Load the dataset into Treasure Data, if the dataset exists in the given database, then replace the dataset\n", | |
"# Please enter the name of the database in the space provided \n", | |
"td.to_td(df, \"<database>.iris\", con, if_exists='replace', index=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# Load the pandas_td extension for ease of querying\n", | |
"%load_ext pandas_td.ipython" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"INFO: import train_normalize\n", | |
"INFO: import ratings\n", | |
"INFO: import iris\n", | |
"INFO: import ratings_testing\n", | |
"INFO: import sgd_predict_f20\n", | |
"INFO: import ratings_2\n", | |
"INFO: import train\n", | |
"INFO: import ratings_training\n", | |
"INFO: import hundred_balls\n", | |
"INFO: import sgd_model_f20\n", | |
"INFO: import test_in_functionality\n" | |
] | |
} | |
], | |
"source": [ | |
"# Set the database for this exercise\n", | |
"# Please enter the name of the database in the space provided \n", | |
"%td_use <Enter Database>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# Create a table to place the label mapping data\n", | |
"# Please enter the name of the database in the space provided \n", | |
"with tdclient.Client(apikey=apikey,endpoint=endpoint) as client:\n", | |
" db = client.database('<Enter Database>')\n", | |
" db.create_log_table(\"iris_label_mapping\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 63, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div style=\"border-style: dashed; border-width: 1px;\">\n", | |
"<div style=\"color: #888;\"># issued at 2015-11-09T23:47:09Z</div>URL: <a href=\"https://console.treasuredata.com/jobs/38957757\" target=\"_blank\">https://console.treasuredata.com/jobs/38957757</a><br>\n", | |
"Result size: 20 bytes<br>\n", | |
"Download: 20 / 20 bytes (100.00%)<br>\n", | |
"<div style=\"color: #888;\"># downloaded at 2015-11-09T23:48:05Z</div></div>\n" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>class</th>\n", | |
" <th>label</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"Empty DataFrame\n", | |
"Columns: [class, label]\n", | |
"Index: []" | |
] | |
}, | |
"execution_count": 63, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%%td_hive\n", | |
" INSERT OVERWRITE TABLE iris_label_mapping \n", | |
" select\n", | |
" class,\n", | |
" rank - 1 as label\n", | |
" from (\n", | |
" select\n", | |
" distinct class,\n", | |
" dense_rank() over (order by class) as rank\n", | |
" from \n", | |
" iris\n", | |
" ) t;" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# Create the table for the training dataset\n", | |
"# Please enter the name of the database in the space provided \n", | |
"with tdclient.Client(apikey=apikey,endpoint=endpoint) as client:\n", | |
" db = client.database('<Enter Database>')\n", | |
" db.create_log_table(\"iris_training\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 54, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div style=\"border-style: dashed; border-width: 1px;\">\n", | |
"<div style=\"color: #888;\"># issued at 2015-11-09T23:35:44Z</div>URL: <a href=\"https://console.treasuredata.com/jobs/38957027\" target=\"_blank\">https://console.treasuredata.com/jobs/38957027</a><br>\n", | |
"Result size: 20 bytes<br>\n", | |
"Download: 20 / 20 bytes (100.00%)<br>\n", | |
"<div style=\"color: #888;\"># downloaded at 2015-11-09T23:36:43Z</div></div>\n" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>rowid</th>\n", | |
" <th>features</th>\n", | |
" <th>label</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"Empty DataFrame\n", | |
"Columns: [rowid, features, label]\n", | |
"Index: []" | |
] | |
}, | |
"execution_count": 54, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%%td_hive\n", | |
" INSERT OVERWRITE TABLE iris_training\n", | |
" select\n", | |
" rowid() as rowid,\n", | |
" array(t1.sepal_length, t1.sepal_width, t1.petal_length, t1.petal_width) as features,\n", | |
" t2.label\n", | |
" from\n", | |
" iris t1\n", | |
" JOIN iris_label_mapping t2 ON (t1.class = t2.class);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# Create the table for the model \n", | |
"# Please enter the name of the database in the space provided \n", | |
"with tdclient.Client(apikey=apikey,endpoint=endpoint) as client:\n", | |
" db = client.database('<Enter Database>')\n", | |
" db.create_log_table(\"iris_model\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div style=\"border-style: dashed; border-width: 1px;\">\n", | |
"<div style=\"color: #888;\"># issued at 2015-11-09T22:55:17Z</div>URL: <a href=\"https://console.treasuredata.com/jobs/38953305\" target=\"_blank\">https://console.treasuredata.com/jobs/38953305</a><br>\n", | |
"Result size: 20 bytes<br>\n", | |
"Download: 20 / 20 bytes (100.00%)<br>\n", | |
"<div style=\"color: #888;\"># downloaded at 2015-11-09T22:55:57Z</div></div>\n" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>model</th>\n", | |
" <th>importance</th>\n", | |
" <th>errors</th>\n", | |
" <th>tests</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"Empty DataFrame\n", | |
"Columns: [model, importance, errors, tests]\n", | |
"Index: []" | |
] | |
}, | |
"execution_count": 27, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%%td_hive\n", | |
" INSERT OVERWRITE TABLE iris_model\n", | |
" select \n", | |
" train_randomforest_classifier(features, label, '-trees 50') \n", | |
" as (model, importance, errors, tests)\n", | |
" from\n", | |
" iris_training;" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>var_importance</th>\n", | |
" <th>oob_err_rate</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>[15.961039017624243, 10.467155497972497, 31.44...</td>\n", | |
" <td>0.046667</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" var_importance oob_err_rate\n", | |
"0 [15.961039017624243, 10.467155497972497, 31.44... 0.046667" | |
] | |
}, | |
"execution_count": 28, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%%td_hive\n", | |
" select\n", | |
" array_sum(importance) as var_importance,\n", | |
" sum(errors) / sum(tests) as oob_err_rate\n", | |
" from\n", | |
" iris_model;" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# Create the table for a human readable model \n", | |
"# Please enter the name of the database in the space provided \n", | |
"with tdclient.Client(apikey=apikey,endpoint=endpoint) as client:\n", | |
" db = client.database('<Enter Database>')\n", | |
" db.create_log_table(\"iris_model_javascript\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div style=\"border-style: dashed; border-width: 1px;\">\n", | |
"<div style=\"color: #888;\"># issued at 2015-11-09T23:00:20Z</div>URL: <a href=\"https://console.treasuredata.com/jobs/38953709\" target=\"_blank\">https://console.treasuredata.com/jobs/38953709</a><br>\n", | |
"Result size: 20 bytes<br>\n", | |
"Download: 20 / 20 bytes (100.00%)<br>\n", | |
"<div style=\"color: #888;\"># downloaded at 2015-11-09T23:01:16Z</div></div>\n" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>model</th>\n", | |
" <th>importance</th>\n", | |
" <th>errors</th>\n", | |
" <th>tests</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"Empty DataFrame\n", | |
"Columns: [model, importance, errors, tests]\n", | |
"Index: []" | |
] | |
}, | |
"execution_count": 31, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%%td_hive\n", | |
" INSERT OVERWRITE TABLE iris_model_javascript\n", | |
" select \n", | |
" train_randomforest_classifier(features, label, '-trees 50 -output_type js') \n", | |
" as (model, importance, errors, tests)\n", | |
" from\n", | |
" iris_training;" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 56, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div style=\"border-style: dashed; border-width: 1px;\">\n", | |
"<div style=\"color: #888;\"># issued at 2015-11-09T23:41:44Z</div>URL: <a href=\"https://console.treasuredata.com/jobs/38957433\" target=\"_blank\">https://console.treasuredata.com/jobs/38957433</a><br>\n", | |
"<div style=\"color: #888;\"># started at 2015-11-09T23:41:46Z</div><pre style=\"color: #c44;\">\n", | |
"** WARNING: time index filtering is not set on \n", | |
"** This query could be very slow as a result.\n", | |
"** Please see http://docs.treasure-data.com/articles/presto-performance-tuning#leveraging-time-based-partitioning</pre>\n", | |
"<pre>2015-11-09 23:41:46: rows pending running done / total\n", | |
" Stage-0: 1 0 0 1 / 1\n", | |
" Stage-1: 50 0 0 1 / 1</pre>Result size: 172 bytes<br>\n", | |
"Download: 172 / 172 bytes (100.00%)<br>\n", | |
"<div style=\"color: #888;\"># downloaded at 2015-11-09T23:41:49Z</div></div>\n" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"if(x[2] <= 2.45) {\n", | |
" 0;\n", | |
"} else {\n", | |
" if(x[0] <= 5.75) {\n", | |
" if(x[3] <= 1.6) {\n", | |
" 1;\n", | |
" } else {\n", | |
" 2;\n", | |
" }\n", | |
" } else {\n", | |
" if(x[0] <= 7.05) {\n", | |
" if(x[2] <= 4.95) {\n", | |
" if(x[2] <= 4.75) {\n", | |
" 1;\n", | |
" } else {\n", | |
" if(x[0] <= 6.25) {\n", | |
" 2;\n", | |
" } else {\n", | |
" 1;\n", | |
" }\n", | |
" }\n", | |
" } else {\n", | |
" if(x[3] <= 1.7000000000000002) {\n", | |
" if(x[1] <= 2.6500000000000004) {\n", | |
" 2;\n", | |
" } else {\n", | |
" 1;\n", | |
" }\n", | |
" } else {\n", | |
" 2;\n", | |
" }\n", | |
" }\n", | |
" } else {\n", | |
" 2;\n", | |
" }\n", | |
" }\n", | |
"}\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"# Save the model as a DataFrame and print the model\n", | |
"model = td.read_td(\"\"\"select model from iris_model_javascript limit 1\"\"\", engine_presto)\n", | |
"print(model.model[0])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 49, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# Create the table for the prediction datapoints\n", | |
"# Please enter the name of the database in the space provided \n", | |
"with tdclient.Client(apikey=apikey,endpoint=endpoint) as client:\n", | |
" db = client.database('<Enter Database>')\n", | |
" db.create_log_table(\"iris_predicted_vm\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 50, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div style=\"border-style: dashed; border-width: 1px;\">\n", | |
"<div style=\"color: #888;\"># issued at 2015-11-09T23:12:53Z</div>URL: <a href=\"https://console.treasuredata.com/jobs/38954892\" target=\"_blank\">https://console.treasuredata.com/jobs/38954892</a><br>\n", | |
"Result size: 20 bytes<br>\n", | |
"Download: 20 / 20 bytes (100.00%)<br>\n", | |
"<div style=\"color: #888;\"># downloaded at 2015-11-09T23:13:58Z</div></div>\n" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>rowid</th>\n", | |
" <th>label</th>\n", | |
" <th>probability</th>\n", | |
" <th>probabilities</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"Empty DataFrame\n", | |
"Columns: [rowid, label, probability, probabilities]\n", | |
"Index: []" | |
] | |
}, | |
"execution_count": 50, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%%td_hive\n", | |
"WITH t1 as (\n", | |
" SELECT\n", | |
" rowid,\n", | |
" rf_ensemble(predicted) as predicted\n", | |
" FROM (\n", | |
" SELECT\n", | |
" t.rowid, \n", | |
" tree_predict(p.model, t.features, true) as predicted\n", | |
" FROM\n", | |
" iris_training t\n", | |
" CROSS JOIN iris_model p\n", | |
" ) t1\n", | |
" group by\n", | |
" rowid\n", | |
" )\n", | |
" INSERT OVERWRITE TABLE iris_predicted_vm\n", | |
" SELECT\n", | |
" rowid,\n", | |
" predicted.label, predicted.probability, predicted.probabilities\n", | |
" FROM\n", | |
" t1;" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 65, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>number_rows</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>150</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" number_rows\n", | |
"0 150" | |
] | |
}, | |
"execution_count": 65, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%%td_presto\n", | |
" select count(1) as number_rows from iris_training;" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 67, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>percentage_actual_over_predicted</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>100</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" percentage_actual_over_predicted\n", | |
"0 100" | |
] | |
}, | |
"execution_count": 67, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%%td_hive\n", | |
" WITH t1 as (\n", | |
" SELECT\n", | |
" t.rowid,\n", | |
" t.label as actual,\n", | |
" p.label as predicted\n", | |
" FROM\n", | |
" iris_predicted_vm p\n", | |
" LEFT OUTER JOIN iris_training t ON (t.rowid = p.rowid)\n", | |
" )\n", | |
" SELECT\n", | |
" (count(1) / 150.0) * 100 as percentage_actual_over_predicted\n", | |
" FROM\n", | |
" t1\n", | |
" WHERE\n", | |
" actual = predicted;" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment