Skip to content

Instantly share code, notes, and snippets.

@myui
Created December 11, 2015 06:09
Show Gist options
  • Save myui/b959716c088e8018c99c to your computer and use it in GitHub Desktop.
Save myui/b959716c088e8018c99c to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 61,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#Necessary imports\n",
"import pandas as pd\n",
"import pandas_td as td\n",
"import tdclient"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Set up your connection and querying engines\n",
"# Please enter your Master API Key in the space provided\n",
"# Please enter the name of the database you will be using throughout the exercise where indicated\n",
"apikey = '<PUT MASTER API KEY HERE>'\n",
"endpoint='https://api.treasuredata.com'\n",
"con = td.connect(apikey=apikey, endpoint=endpoint)\n",
"engine_presto = con.query_engine(database='<ENTER DATABASE>', type='presto')"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Create the table in the database for the iris dataset\n",
"# Please enter the name of the database in the space provided \n",
"with tdclient.Client(apikey=apikey,endpoint=endpoint) as client:\n",
" db = client.database('<ENTER DATABASE>')\n",
" db.create_log_table('iris')"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sepal_length</th>\n",
" <th>sepal_width</th>\n",
" <th>petal_length</th>\n",
" <th>petal_width</th>\n",
" <th>class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>5.1</td>\n",
" <td>3.5</td>\n",
" <td>1.4</td>\n",
" <td>0.2</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>4.9</td>\n",
" <td>3.0</td>\n",
" <td>1.4</td>\n",
" <td>0.2</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>4.7</td>\n",
" <td>3.2</td>\n",
" <td>1.3</td>\n",
" <td>0.2</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4.6</td>\n",
" <td>3.1</td>\n",
" <td>1.5</td>\n",
" <td>0.2</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5.0</td>\n",
" <td>3.6</td>\n",
" <td>1.4</td>\n",
" <td>0.2</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>5.4</td>\n",
" <td>3.9</td>\n",
" <td>1.7</td>\n",
" <td>0.4</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>4.6</td>\n",
" <td>3.4</td>\n",
" <td>1.4</td>\n",
" <td>0.3</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>5.0</td>\n",
" <td>3.4</td>\n",
" <td>1.5</td>\n",
" <td>0.2</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>4.4</td>\n",
" <td>2.9</td>\n",
" <td>1.4</td>\n",
" <td>0.2</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>4.9</td>\n",
" <td>3.1</td>\n",
" <td>1.5</td>\n",
" <td>0.1</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>5.4</td>\n",
" <td>3.7</td>\n",
" <td>1.5</td>\n",
" <td>0.2</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>4.8</td>\n",
" <td>3.4</td>\n",
" <td>1.6</td>\n",
" <td>0.2</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>4.8</td>\n",
" <td>3.0</td>\n",
" <td>1.4</td>\n",
" <td>0.1</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>4.3</td>\n",
" <td>3.0</td>\n",
" <td>1.1</td>\n",
" <td>0.1</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>5.8</td>\n",
" <td>4.0</td>\n",
" <td>1.2</td>\n",
" <td>0.2</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>5.7</td>\n",
" <td>4.4</td>\n",
" <td>1.5</td>\n",
" <td>0.4</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>5.4</td>\n",
" <td>3.9</td>\n",
" <td>1.3</td>\n",
" <td>0.4</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>5.1</td>\n",
" <td>3.5</td>\n",
" <td>1.4</td>\n",
" <td>0.3</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>5.7</td>\n",
" <td>3.8</td>\n",
" <td>1.7</td>\n",
" <td>0.3</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>5.1</td>\n",
" <td>3.8</td>\n",
" <td>1.5</td>\n",
" <td>0.3</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>5.4</td>\n",
" <td>3.4</td>\n",
" <td>1.7</td>\n",
" <td>0.2</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>5.1</td>\n",
" <td>3.7</td>\n",
" <td>1.5</td>\n",
" <td>0.4</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>4.6</td>\n",
" <td>3.6</td>\n",
" <td>1.0</td>\n",
" <td>0.2</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>5.1</td>\n",
" <td>3.3</td>\n",
" <td>1.7</td>\n",
" <td>0.5</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>4.8</td>\n",
" <td>3.4</td>\n",
" <td>1.9</td>\n",
" <td>0.2</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>1.6</td>\n",
" <td>0.2</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>5.0</td>\n",
" <td>3.4</td>\n",
" <td>1.6</td>\n",
" <td>0.4</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>5.2</td>\n",
" <td>3.5</td>\n",
" <td>1.5</td>\n",
" <td>0.2</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>5.2</td>\n",
" <td>3.4</td>\n",
" <td>1.4</td>\n",
" <td>0.2</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>4.7</td>\n",
" <td>3.2</td>\n",
" <td>1.6</td>\n",
" <td>0.2</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>120</th>\n",
" <td>6.9</td>\n",
" <td>3.2</td>\n",
" <td>5.7</td>\n",
" <td>2.3</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>121</th>\n",
" <td>5.6</td>\n",
" <td>2.8</td>\n",
" <td>4.9</td>\n",
" <td>2.0</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>122</th>\n",
" <td>7.7</td>\n",
" <td>2.8</td>\n",
" <td>6.7</td>\n",
" <td>2.0</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>123</th>\n",
" <td>6.3</td>\n",
" <td>2.7</td>\n",
" <td>4.9</td>\n",
" <td>1.8</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>124</th>\n",
" <td>6.7</td>\n",
" <td>3.3</td>\n",
" <td>5.7</td>\n",
" <td>2.1</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>125</th>\n",
" <td>7.2</td>\n",
" <td>3.2</td>\n",
" <td>6.0</td>\n",
" <td>1.8</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>126</th>\n",
" <td>6.2</td>\n",
" <td>2.8</td>\n",
" <td>4.8</td>\n",
" <td>1.8</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>127</th>\n",
" <td>6.1</td>\n",
" <td>3.0</td>\n",
" <td>4.9</td>\n",
" <td>1.8</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>128</th>\n",
" <td>6.4</td>\n",
" <td>2.8</td>\n",
" <td>5.6</td>\n",
" <td>2.1</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>129</th>\n",
" <td>7.2</td>\n",
" <td>3.0</td>\n",
" <td>5.8</td>\n",
" <td>1.6</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>130</th>\n",
" <td>7.4</td>\n",
" <td>2.8</td>\n",
" <td>6.1</td>\n",
" <td>1.9</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>131</th>\n",
" <td>7.9</td>\n",
" <td>3.8</td>\n",
" <td>6.4</td>\n",
" <td>2.0</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>132</th>\n",
" <td>6.4</td>\n",
" <td>2.8</td>\n",
" <td>5.6</td>\n",
" <td>2.2</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>133</th>\n",
" <td>6.3</td>\n",
" <td>2.8</td>\n",
" <td>5.1</td>\n",
" <td>1.5</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>134</th>\n",
" <td>6.1</td>\n",
" <td>2.6</td>\n",
" <td>5.6</td>\n",
" <td>1.4</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>135</th>\n",
" <td>7.7</td>\n",
" <td>3.0</td>\n",
" <td>6.1</td>\n",
" <td>2.3</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>136</th>\n",
" <td>6.3</td>\n",
" <td>3.4</td>\n",
" <td>5.6</td>\n",
" <td>2.4</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>137</th>\n",
" <td>6.4</td>\n",
" <td>3.1</td>\n",
" <td>5.5</td>\n",
" <td>1.8</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>138</th>\n",
" <td>6.0</td>\n",
" <td>3.0</td>\n",
" <td>4.8</td>\n",
" <td>1.8</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>139</th>\n",
" <td>6.9</td>\n",
" <td>3.1</td>\n",
" <td>5.4</td>\n",
" <td>2.1</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>140</th>\n",
" <td>6.7</td>\n",
" <td>3.1</td>\n",
" <td>5.6</td>\n",
" <td>2.4</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>141</th>\n",
" <td>6.9</td>\n",
" <td>3.1</td>\n",
" <td>5.1</td>\n",
" <td>2.3</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>142</th>\n",
" <td>5.8</td>\n",
" <td>2.7</td>\n",
" <td>5.1</td>\n",
" <td>1.9</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>143</th>\n",
" <td>6.8</td>\n",
" <td>3.2</td>\n",
" <td>5.9</td>\n",
" <td>2.3</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>144</th>\n",
" <td>6.7</td>\n",
" <td>3.3</td>\n",
" <td>5.7</td>\n",
" <td>2.5</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>145</th>\n",
" <td>6.7</td>\n",
" <td>3.0</td>\n",
" <td>5.2</td>\n",
" <td>2.3</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>146</th>\n",
" <td>6.3</td>\n",
" <td>2.5</td>\n",
" <td>5.0</td>\n",
" <td>1.9</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>147</th>\n",
" <td>6.5</td>\n",
" <td>3.0</td>\n",
" <td>5.2</td>\n",
" <td>2.0</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>148</th>\n",
" <td>6.2</td>\n",
" <td>3.4</td>\n",
" <td>5.4</td>\n",
" <td>2.3</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>149</th>\n",
" <td>5.9</td>\n",
" <td>3.0</td>\n",
" <td>5.1</td>\n",
" <td>1.8</td>\n",
" <td>Iris-virginica</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>150 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" sepal_length sepal_width petal_length petal_width class\n",
"0 5.1 3.5 1.4 0.2 Iris-setosa\n",
"1 4.9 3.0 1.4 0.2 Iris-setosa\n",
"2 4.7 3.2 1.3 0.2 Iris-setosa\n",
"3 4.6 3.1 1.5 0.2 Iris-setosa\n",
"4 5.0 3.6 1.4 0.2 Iris-setosa\n",
"5 5.4 3.9 1.7 0.4 Iris-setosa\n",
"6 4.6 3.4 1.4 0.3 Iris-setosa\n",
"7 5.0 3.4 1.5 0.2 Iris-setosa\n",
"8 4.4 2.9 1.4 0.2 Iris-setosa\n",
"9 4.9 3.1 1.5 0.1 Iris-setosa\n",
"10 5.4 3.7 1.5 0.2 Iris-setosa\n",
"11 4.8 3.4 1.6 0.2 Iris-setosa\n",
"12 4.8 3.0 1.4 0.1 Iris-setosa\n",
"13 4.3 3.0 1.1 0.1 Iris-setosa\n",
"14 5.8 4.0 1.2 0.2 Iris-setosa\n",
"15 5.7 4.4 1.5 0.4 Iris-setosa\n",
"16 5.4 3.9 1.3 0.4 Iris-setosa\n",
"17 5.1 3.5 1.4 0.3 Iris-setosa\n",
"18 5.7 3.8 1.7 0.3 Iris-setosa\n",
"19 5.1 3.8 1.5 0.3 Iris-setosa\n",
"20 5.4 3.4 1.7 0.2 Iris-setosa\n",
"21 5.1 3.7 1.5 0.4 Iris-setosa\n",
"22 4.6 3.6 1.0 0.2 Iris-setosa\n",
"23 5.1 3.3 1.7 0.5 Iris-setosa\n",
"24 4.8 3.4 1.9 0.2 Iris-setosa\n",
"25 5.0 3.0 1.6 0.2 Iris-setosa\n",
"26 5.0 3.4 1.6 0.4 Iris-setosa\n",
"27 5.2 3.5 1.5 0.2 Iris-setosa\n",
"28 5.2 3.4 1.4 0.2 Iris-setosa\n",
"29 4.7 3.2 1.6 0.2 Iris-setosa\n",
".. ... ... ... ... ...\n",
"120 6.9 3.2 5.7 2.3 Iris-virginica\n",
"121 5.6 2.8 4.9 2.0 Iris-virginica\n",
"122 7.7 2.8 6.7 2.0 Iris-virginica\n",
"123 6.3 2.7 4.9 1.8 Iris-virginica\n",
"124 6.7 3.3 5.7 2.1 Iris-virginica\n",
"125 7.2 3.2 6.0 1.8 Iris-virginica\n",
"126 6.2 2.8 4.8 1.8 Iris-virginica\n",
"127 6.1 3.0 4.9 1.8 Iris-virginica\n",
"128 6.4 2.8 5.6 2.1 Iris-virginica\n",
"129 7.2 3.0 5.8 1.6 Iris-virginica\n",
"130 7.4 2.8 6.1 1.9 Iris-virginica\n",
"131 7.9 3.8 6.4 2.0 Iris-virginica\n",
"132 6.4 2.8 5.6 2.2 Iris-virginica\n",
"133 6.3 2.8 5.1 1.5 Iris-virginica\n",
"134 6.1 2.6 5.6 1.4 Iris-virginica\n",
"135 7.7 3.0 6.1 2.3 Iris-virginica\n",
"136 6.3 3.4 5.6 2.4 Iris-virginica\n",
"137 6.4 3.1 5.5 1.8 Iris-virginica\n",
"138 6.0 3.0 4.8 1.8 Iris-virginica\n",
"139 6.9 3.1 5.4 2.1 Iris-virginica\n",
"140 6.7 3.1 5.6 2.4 Iris-virginica\n",
"141 6.9 3.1 5.1 2.3 Iris-virginica\n",
"142 5.8 2.7 5.1 1.9 Iris-virginica\n",
"143 6.8 3.2 5.9 2.3 Iris-virginica\n",
"144 6.7 3.3 5.7 2.5 Iris-virginica\n",
"145 6.7 3.0 5.2 2.3 Iris-virginica\n",
"146 6.3 2.5 5.0 1.9 Iris-virginica\n",
"147 6.5 3.0 5.2 2.0 Iris-virginica\n",
"148 6.2 3.4 5.4 2.3 Iris-virginica\n",
"149 5.9 3.0 5.1 1.8 Iris-virginica\n",
"\n",
"[150 rows x 5 columns]"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Load the original iris dataset into pandas\n",
"# Please enter the filepath to where you saved the Iris data file, must be in csv format\n",
"df = pd.read_csv('<PATH TO IRIS CSV FILE>', header=0)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Load the dataset into Treasure Data, if the dataset exists in the given database, then replace the dataset\n",
"# Please enter the name of the database in the space provided \n",
"td.to_td(df, \"<database>.iris\", con, if_exists='replace', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Load the pandas_td extension for ease of querying\n",
"%load_ext pandas_td.ipython"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO: import train_normalize\n",
"INFO: import ratings\n",
"INFO: import iris\n",
"INFO: import ratings_testing\n",
"INFO: import sgd_predict_f20\n",
"INFO: import ratings_2\n",
"INFO: import train\n",
"INFO: import ratings_training\n",
"INFO: import hundred_balls\n",
"INFO: import sgd_model_f20\n",
"INFO: import test_in_functionality\n"
]
}
],
"source": [
"# Set the database for this exercise\n",
"# Please enter the name of the database in the space provided \n",
"%td_use <Enter Database>"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Create a table to place the label mapping data\n",
"# Please enter the name of the database in the space provided \n",
"with tdclient.Client(apikey=apikey,endpoint=endpoint) as client:\n",
" db = client.database('<Enter Database>')\n",
" db.create_log_table(\"iris_label_mapping\")"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"border-style: dashed; border-width: 1px;\">\n",
"<div style=\"color: #888;\"># issued at 2015-11-09T23:47:09Z</div>URL: <a href=\"https://console.treasuredata.com/jobs/38957757\" target=\"_blank\">https://console.treasuredata.com/jobs/38957757</a><br>\n",
"Result size: 20 bytes<br>\n",
"Download: 20 / 20 bytes (100.00%)<br>\n",
"<div style=\"color: #888;\"># downloaded at 2015-11-09T23:48:05Z</div></div>\n"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>class</th>\n",
" <th>label</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [class, label]\n",
"Index: []"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%td_hive\n",
" INSERT OVERWRITE TABLE iris_label_mapping \n",
" select\n",
" class,\n",
" rank - 1 as label\n",
" from (\n",
" select\n",
" distinct class,\n",
" dense_rank() over (order by class) as rank\n",
" from \n",
" iris\n",
" ) t;"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Create the table for the training dataset\n",
"# Please enter the name of the database in the space provided \n",
"with tdclient.Client(apikey=apikey,endpoint=endpoint) as client:\n",
" db = client.database('<Enter Database>')\n",
" db.create_log_table(\"iris_training\")"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"border-style: dashed; border-width: 1px;\">\n",
"<div style=\"color: #888;\"># issued at 2015-11-09T23:35:44Z</div>URL: <a href=\"https://console.treasuredata.com/jobs/38957027\" target=\"_blank\">https://console.treasuredata.com/jobs/38957027</a><br>\n",
"Result size: 20 bytes<br>\n",
"Download: 20 / 20 bytes (100.00%)<br>\n",
"<div style=\"color: #888;\"># downloaded at 2015-11-09T23:36:43Z</div></div>\n"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>rowid</th>\n",
" <th>features</th>\n",
" <th>label</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [rowid, features, label]\n",
"Index: []"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%td_hive\n",
" INSERT OVERWRITE TABLE iris_training\n",
" select\n",
" rowid() as rowid,\n",
" array(t1.sepal_length, t1.sepal_width, t1.petal_length, t1.petal_width) as features,\n",
" t2.label\n",
" from\n",
" iris t1\n",
" JOIN iris_label_mapping t2 ON (t1.class = t2.class);"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Create the table for the model \n",
"# Please enter the name of the database in the space provided \n",
"with tdclient.Client(apikey=apikey,endpoint=endpoint) as client:\n",
" db = client.database('<Enter Database>')\n",
" db.create_log_table(\"iris_model\")"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"border-style: dashed; border-width: 1px;\">\n",
"<div style=\"color: #888;\"># issued at 2015-11-09T22:55:17Z</div>URL: <a href=\"https://console.treasuredata.com/jobs/38953305\" target=\"_blank\">https://console.treasuredata.com/jobs/38953305</a><br>\n",
"Result size: 20 bytes<br>\n",
"Download: 20 / 20 bytes (100.00%)<br>\n",
"<div style=\"color: #888;\"># downloaded at 2015-11-09T22:55:57Z</div></div>\n"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>model</th>\n",
" <th>importance</th>\n",
" <th>errors</th>\n",
" <th>tests</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [model, importance, errors, tests]\n",
"Index: []"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%td_hive\n",
" INSERT OVERWRITE TABLE iris_model\n",
" select \n",
" train_randomforest_classifier(features, label, '-trees 50') \n",
" as (model, importance, errors, tests)\n",
" from\n",
" iris_training;"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>var_importance</th>\n",
" <th>oob_err_rate</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>[15.961039017624243, 10.467155497972497, 31.44...</td>\n",
" <td>0.046667</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" var_importance oob_err_rate\n",
"0 [15.961039017624243, 10.467155497972497, 31.44... 0.046667"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%td_hive\n",
" select\n",
" array_sum(importance) as var_importance,\n",
" sum(errors) / sum(tests) as oob_err_rate\n",
" from\n",
" iris_model;"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Create the table for a human readable model \n",
"# Please enter the name of the database in the space provided \n",
"with tdclient.Client(apikey=apikey,endpoint=endpoint) as client:\n",
" db = client.database('<Enter Database>')\n",
" db.create_log_table(\"iris_model_javascript\")"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"border-style: dashed; border-width: 1px;\">\n",
"<div style=\"color: #888;\"># issued at 2015-11-09T23:00:20Z</div>URL: <a href=\"https://console.treasuredata.com/jobs/38953709\" target=\"_blank\">https://console.treasuredata.com/jobs/38953709</a><br>\n",
"Result size: 20 bytes<br>\n",
"Download: 20 / 20 bytes (100.00%)<br>\n",
"<div style=\"color: #888;\"># downloaded at 2015-11-09T23:01:16Z</div></div>\n"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>model</th>\n",
" <th>importance</th>\n",
" <th>errors</th>\n",
" <th>tests</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [model, importance, errors, tests]\n",
"Index: []"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%td_hive\n",
" INSERT OVERWRITE TABLE iris_model_javascript\n",
" select \n",
" train_randomforest_classifier(features, label, '-trees 50 -output_type js') \n",
" as (model, importance, errors, tests)\n",
" from\n",
" iris_training;"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"border-style: dashed; border-width: 1px;\">\n",
"<div style=\"color: #888;\"># issued at 2015-11-09T23:41:44Z</div>URL: <a href=\"https://console.treasuredata.com/jobs/38957433\" target=\"_blank\">https://console.treasuredata.com/jobs/38957433</a><br>\n",
"<div style=\"color: #888;\"># started at 2015-11-09T23:41:46Z</div><pre style=\"color: #c44;\">\n",
"** WARNING: time index filtering is not set on \n",
"** This query could be very slow as a result.\n",
"** Please see http://docs.treasure-data.com/articles/presto-performance-tuning#leveraging-time-based-partitioning</pre>\n",
"<pre>2015-11-09 23:41:46: rows pending running done / total\n",
" Stage-0: 1 0 0 1 / 1\n",
" Stage-1: 50 0 0 1 / 1</pre>Result size: 172 bytes<br>\n",
"Download: 172 / 172 bytes (100.00%)<br>\n",
"<div style=\"color: #888;\"># downloaded at 2015-11-09T23:41:49Z</div></div>\n"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"if(x[2] <= 2.45) {\n",
" 0;\n",
"} else {\n",
" if(x[0] <= 5.75) {\n",
" if(x[3] <= 1.6) {\n",
" 1;\n",
" } else {\n",
" 2;\n",
" }\n",
" } else {\n",
" if(x[0] <= 7.05) {\n",
" if(x[2] <= 4.95) {\n",
" if(x[2] <= 4.75) {\n",
" 1;\n",
" } else {\n",
" if(x[0] <= 6.25) {\n",
" 2;\n",
" } else {\n",
" 1;\n",
" }\n",
" }\n",
" } else {\n",
" if(x[3] <= 1.7000000000000002) {\n",
" if(x[1] <= 2.6500000000000004) {\n",
" 2;\n",
" } else {\n",
" 1;\n",
" }\n",
" } else {\n",
" 2;\n",
" }\n",
" }\n",
" } else {\n",
" 2;\n",
" }\n",
" }\n",
"}\n",
"\n"
]
}
],
"source": [
"# Save the model as a DataFrame and print the model\n",
"model = td.read_td(\"\"\"select model from iris_model_javascript limit 1\"\"\", engine_presto)\n",
"print(model.model[0])"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Create the table for the prediction datapoints\n",
"# Please enter the name of the database in the space provided \n",
"with tdclient.Client(apikey=apikey,endpoint=endpoint) as client:\n",
" db = client.database('<Enter Database>')\n",
" db.create_log_table(\"iris_predicted_vm\")"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"border-style: dashed; border-width: 1px;\">\n",
"<div style=\"color: #888;\"># issued at 2015-11-09T23:12:53Z</div>URL: <a href=\"https://console.treasuredata.com/jobs/38954892\" target=\"_blank\">https://console.treasuredata.com/jobs/38954892</a><br>\n",
"Result size: 20 bytes<br>\n",
"Download: 20 / 20 bytes (100.00%)<br>\n",
"<div style=\"color: #888;\"># downloaded at 2015-11-09T23:13:58Z</div></div>\n"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>rowid</th>\n",
" <th>label</th>\n",
" <th>probability</th>\n",
" <th>probabilities</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [rowid, label, probability, probabilities]\n",
"Index: []"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%td_hive\n",
"WITH t1 as (\n",
" SELECT\n",
" rowid,\n",
" rf_ensemble(predicted) as predicted\n",
" FROM (\n",
" SELECT\n",
" t.rowid, \n",
" tree_predict(p.model, t.features, true) as predicted\n",
" FROM\n",
" iris_training t\n",
" CROSS JOIN iris_model p\n",
" ) t1\n",
" group by\n",
" rowid\n",
" )\n",
" INSERT OVERWRITE TABLE iris_predicted_vm\n",
" SELECT\n",
" rowid,\n",
" predicted.label, predicted.probability, predicted.probabilities\n",
" FROM\n",
" t1;"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_rows</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>150</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_rows\n",
"0 150"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%td_presto\n",
" select count(1) as number_rows from iris_training;"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>percentage_actual_over_predicted</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>100</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" percentage_actual_over_predicted\n",
"0 100"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%td_hive\n",
" WITH t1 as (\n",
" SELECT\n",
" t.rowid,\n",
" t.label as actual,\n",
" p.label as predicted\n",
" FROM\n",
" iris_predicted_vm p\n",
" LEFT OUTER JOIN iris_training t ON (t.rowid = p.rowid)\n",
" )\n",
" SELECT\n",
" (count(1) / 150.0) * 100 as percentage_actual_over_predicted\n",
" FROM\n",
" t1\n",
" WHERE\n",
" actual = predicted;"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.0"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment