Last active
November 25, 2020 13:58
-
-
Save gumdropsteve/51d39e477422d8dd521fc2131984755b to your computer and use it in GitHub Desktop.
Run these notebooks on app.blazingsql.com
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# SQL Joins with BlazingSQL\n", | |
"- JOIN (INNER JOIN)\n", | |
"- LEFT JOIN\n", | |
"- RIGHT JOIN\n", | |
"- FULL JOIN (FULL OUTER JOIN)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Imports" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"BlazingContext ready\n" | |
] | |
} | |
], | |
"source": [ | |
"# pandas on GPU\n", | |
"import cudf\n", | |
"\n", | |
"from blazingsql import BlazingContext\n", | |
"# start up BlazingSQL (BlazingContext instance)\n", | |
"bc = BlazingContext()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Zillow Prize Dataset ([Zestimate](https://www.kaggle.com/c/zillow-prize-1/data))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### 2016 Training Data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>parcelid</th>\n", | |
" <th>logerror</th>\n", | |
" <th>transactiondate</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>90273</th>\n", | |
" <td>11402105</td>\n", | |
" <td>0.0602</td>\n", | |
" <td>2016-12-30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>90274</th>\n", | |
" <td>12566293</td>\n", | |
" <td>0.4207</td>\n", | |
" <td>2016-12-30</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" parcelid logerror transactiondate\n", | |
"90273 11402105 0.0602 2016-12-30\n", | |
"90274 12566293 0.4207 2016-12-30" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df = cudf.read_csv('https://github.com/gumdropsteve/datasets/blob/master/zillow/train_2016_v2.csv?raw=true')\n", | |
"\n", | |
"bc.create_table('train_2016', df)\n", | |
"\n", | |
"df.tail(2)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### 2016 Property Data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>parcelid</th>\n", | |
" <th>airconditioningtypeid</th>\n", | |
" <th>architecturalstyletypeid</th>\n", | |
" <th>basementsqft</th>\n", | |
" <th>bathroomcnt</th>\n", | |
" <th>bedroomcnt</th>\n", | |
" <th>buildingclasstypeid</th>\n", | |
" <th>buildingqualitytypeid</th>\n", | |
" <th>calculatedbathnbr</th>\n", | |
" <th>decktypeid</th>\n", | |
" <th>...</th>\n", | |
" <th>numberofstories</th>\n", | |
" <th>fireplaceflag</th>\n", | |
" <th>structuretaxvaluedollarcnt</th>\n", | |
" <th>taxvaluedollarcnt</th>\n", | |
" <th>assessmentyear</th>\n", | |
" <th>landtaxvaluedollarcnt</th>\n", | |
" <th>taxamount</th>\n", | |
" <th>taxdelinquencyflag</th>\n", | |
" <th>taxdelinquencyyear</th>\n", | |
" <th>censustractandblock</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>99505</th>\n", | |
" <td>13841611</td>\n", | |
" <td><NA></td>\n", | |
" <td><NA></td>\n", | |
" <td><NA></td>\n", | |
" <td>2.0</td>\n", | |
" <td>4.0</td>\n", | |
" <td><NA></td>\n", | |
" <td><NA></td>\n", | |
" <td>2.0</td>\n", | |
" <td><NA></td>\n", | |
" <td>...</td>\n", | |
" <td>1.0</td>\n", | |
" <td><NA></td>\n", | |
" <td>109759.0</td>\n", | |
" <td>332481.0</td>\n", | |
" <td>2015</td>\n", | |
" <td>222722.0</td>\n", | |
" <td>4034.84</td>\n", | |
" <td><NA></td>\n", | |
" <td><NA></td>\n", | |
" <td>6.059075e+13</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>99506</th>\n", | |
" <td>13842040</td>\n", | |
" <td><NA></td>\n", | |
" <td><NA></td>\n", | |
" <td><NA></td>\n", | |
" <td>1.0</td>\n", | |
" <td>3.0</td>\n", | |
" <td><NA></td>\n", | |
" <td><NA></td>\n", | |
" <td>1.0</td>\n", | |
" <td><NA></td>\n", | |
" <td>...</td>\n", | |
" <td>1.0</td>\n", | |
" <td><NA></td>\n", | |
" <td>47101.0</td>\n", | |
" <td>233509.0</td>\n", | |
" <td>2015</td>\n", | |
" <td>186408.0</td>\n", | |
" <td>2912.74</td>\n", | |
" <td><NA></td>\n", | |
" <td><NA></td>\n", | |
" <td>6.059075e+13</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>2 rows × 58 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" parcelid airconditioningtypeid architecturalstyletypeid basementsqft \\\n", | |
"99505 13841611 <NA> <NA> <NA> \n", | |
"99506 13842040 <NA> <NA> <NA> \n", | |
"\n", | |
" bathroomcnt bedroomcnt buildingclasstypeid buildingqualitytypeid \\\n", | |
"99505 2.0 4.0 <NA> <NA> \n", | |
"99506 1.0 3.0 <NA> <NA> \n", | |
"\n", | |
" calculatedbathnbr decktypeid ... numberofstories fireplaceflag \\\n", | |
"99505 2.0 <NA> ... 1.0 <NA> \n", | |
"99506 1.0 <NA> ... 1.0 <NA> \n", | |
"\n", | |
" structuretaxvaluedollarcnt taxvaluedollarcnt assessmentyear \\\n", | |
"99505 109759.0 332481.0 2015 \n", | |
"99506 47101.0 233509.0 2015 \n", | |
"\n", | |
" landtaxvaluedollarcnt taxamount taxdelinquencyflag \\\n", | |
"99505 222722.0 4034.84 <NA> \n", | |
"99506 186408.0 2912.74 <NA> \n", | |
"\n", | |
" taxdelinquencyyear censustractandblock \n", | |
"99505 <NA> 6.059075e+13 \n", | |
"99506 <NA> 6.059075e+13 \n", | |
"\n", | |
"[2 rows x 58 columns]" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df = cudf.read_csv('https://github.com/gumdropsteve/datasets/blob/master/zillow/properties_2016_part_0.csv?raw=true')\n", | |
"\n", | |
"bc.create_table('properties_2016', df)\n", | |
"\n", | |
"df.tail(2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>transactiondate</th>\n", | |
" <th>parcelid</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2016-08-05</td>\n", | |
" <td>13040966</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2016-07-29</td>\n", | |
" <td>12473295</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2016-04-28</td>\n", | |
" <td>12473624</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>2016-06-20</td>\n", | |
" <td>10981317</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>2016-08-26</td>\n", | |
" <td>11755612</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2949</th>\n", | |
" <td>2016-06-30</td>\n", | |
" <td>12965875</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2950</th>\n", | |
" <td>2016-08-23</td>\n", | |
" <td>11047223</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2951</th>\n", | |
" <td>2016-03-15</td>\n", | |
" <td>12154867</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2952</th>\n", | |
" <td>2016-09-01</td>\n", | |
" <td>12156270</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2953</th>\n", | |
" <td>2016-05-27</td>\n", | |
" <td>12158836</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>2954 rows × 2 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" transactiondate parcelid\n", | |
"0 2016-08-05 13040966\n", | |
"1 2016-07-29 12473295\n", | |
"2 2016-04-28 12473624\n", | |
"3 2016-06-20 10981317\n", | |
"4 2016-08-26 11755612\n", | |
"... ... ...\n", | |
"2949 2016-06-30 12965875\n", | |
"2950 2016-08-23 11047223\n", | |
"2951 2016-03-15 12154867\n", | |
"2952 2016-09-01 12156270\n", | |
"2953 2016-05-27 12158836\n", | |
"\n", | |
"[2954 rows x 2 columns]" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"query = '''\n", | |
" SELECT\n", | |
" \n", | |
" t.transactiondate,\n", | |
" p.parcelid\n", | |
" \n", | |
" FROM\n", | |
" \n", | |
" properties_2016 AS p\n", | |
" \n", | |
" JOIN\n", | |
" \n", | |
" train_2016 AS t\n", | |
" \n", | |
" ON t.parcelid = p.parcelid\n", | |
" '''\n", | |
"bc.sql(query)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"query = '''\n", | |
" SELECT\n", | |
" \n", | |
" t.transactiondate,\n", | |
" p.parcelid\n", | |
" \n", | |
" FROM\n", | |
" \n", | |
" properties_2016 AS p\n", | |
" \n", | |
" RIGHT JOIN\n", | |
" \n", | |
" train_2016 AS t\n", | |
" \n", | |
" ON t.parcelid = p.parcelid\n", | |
" '''\n", | |
"bc.sql(query)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>transactiondate</th>\n", | |
" <th>parcelid</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2016-01-01</td>\n", | |
" <td><NA></td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2016-01-01</td>\n", | |
" <td><NA></td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2016-01-01</td>\n", | |
" <td><NA></td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>2016-01-02</td>\n", | |
" <td><NA></td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>2016-01-02</td>\n", | |
" <td><NA></td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>186829</th>\n", | |
" <td><NA></td>\n", | |
" <td>12157397</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>186830</th>\n", | |
" <td><NA></td>\n", | |
" <td>12157970</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>186831</th>\n", | |
" <td><NA></td>\n", | |
" <td>12158139</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>186832</th>\n", | |
" <td><NA></td>\n", | |
" <td>12158323</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>186833</th>\n", | |
" <td><NA></td>\n", | |
" <td>12158790</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>186834 rows × 2 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" transactiondate parcelid\n", | |
"0 2016-01-01 <NA>\n", | |
"1 2016-01-01 <NA>\n", | |
"2 2016-01-01 <NA>\n", | |
"3 2016-01-02 <NA>\n", | |
"4 2016-01-02 <NA>\n", | |
"... ... ...\n", | |
"186829 <NA> 12157397\n", | |
"186830 <NA> 12157970\n", | |
"186831 <NA> 12158139\n", | |
"186832 <NA> 12158323\n", | |
"186833 <NA> 12158790\n", | |
"\n", | |
"[186834 rows x 2 columns]" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"query = '''\n", | |
" SELECT\n", | |
" \n", | |
" t.transactiondate,\n", | |
" p.parcelid\n", | |
" \n", | |
" FROM\n", | |
" \n", | |
" properties_2016 AS p\n", | |
" \n", | |
" FULL OUTER JOIN\n", | |
" \n", | |
" train_2016 AS t\n", | |
" \n", | |
" ON t.parcelid = p.parcelid\n", | |
" '''\n", | |
"bc.sql(query)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Cancer Data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# this cell downloads the data\n", | |
"!wget 'https://blazingsql-colab.s3.amazonaws.com/cancer_data/cancer_data_00.csv'\n", | |
"!wget 'https://blazingsql-colab.s3.amazonaws.com/cancer_data/cancer_data_01.parquet'\n", | |
"!wget 'https://blazingsql-colab.s3.amazonaws.com/cancer_data/cancer_data_02.csv'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"column_names = ['diagnosis_result', 'radius', 'texture', 'perimeter']\n", | |
"bc.create_table('data_00', 'cancer_data_00.csv', names=column_names)\n", | |
"\n", | |
"column_names = ['perimeter', 'area', 'smoothness', 'compactness']\n", | |
"bc.create_table('data_01', 'cancer_data_01.parquet', names=column_names)\n", | |
"\n", | |
"column_names = ['compactness', 'symmetry', 'fractal_dimension']\n", | |
"bc.create_table('data_02', 'cancer_data_02.csv', names=column_names)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>diagnosis_result</th>\n", | |
" <th>radius</th>\n", | |
" <th>texture</th>\n", | |
" <th>perimeter</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>23</td>\n", | |
" <td>12</td>\n", | |
" <td>151</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0</td>\n", | |
" <td>9</td>\n", | |
" <td>13</td>\n", | |
" <td>133</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" diagnosis_result radius texture perimeter\n", | |
"0 1 23 12 151\n", | |
"1 0 9 13 133" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"bc.sql('SELECT * FROM data_00 LIMIT 2')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>perimeter</th>\n", | |
" <th>area</th>\n", | |
" <th>smoothness</th>\n", | |
" <th>compactness</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>151.0</td>\n", | |
" <td>954.0</td>\n", | |
" <td>0.143</td>\n", | |
" <td>0.278</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>133.0</td>\n", | |
" <td>1326.0</td>\n", | |
" <td>0.143</td>\n", | |
" <td>0.079</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" perimeter area smoothness compactness\n", | |
"0 151.0 954.0 0.143 0.278\n", | |
"1 133.0 1326.0 0.143 0.079" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"bc.sql('SELECT * FROM data_01 LIMIT 2')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>compactness</th>\n", | |
" <th>symmetry</th>\n", | |
" <th>fractal_dimension</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0.278</td>\n", | |
" <td>0.242</td>\n", | |
" <td>0.079</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0.079</td>\n", | |
" <td>0.181</td>\n", | |
" <td>0.057</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" compactness symmetry fractal_dimension\n", | |
"0 0.278 0.242 0.079\n", | |
"1 0.079 0.181 0.057" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"bc.sql('SELECT * FROM data_02 LIMIT 2')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### JOIN" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>diagnosis_result</th>\n", | |
" <th>radius</th>\n", | |
" <th>texture</th>\n", | |
" <th>perimeter</th>\n", | |
" <th>area</th>\n", | |
" <th>smoothness</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>22</td>\n", | |
" <td>14</td>\n", | |
" <td>78</td>\n", | |
" <td>386.0</td>\n", | |
" <td>0.070</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0</td>\n", | |
" <td>19</td>\n", | |
" <td>27</td>\n", | |
" <td>62</td>\n", | |
" <td>295.0</td>\n", | |
" <td>0.102</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0</td>\n", | |
" <td>21</td>\n", | |
" <td>24</td>\n", | |
" <td>74</td>\n", | |
" <td>402.0</td>\n", | |
" <td>0.110</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>16</td>\n", | |
" <td>27</td>\n", | |
" <td>94</td>\n", | |
" <td>578.0</td>\n", | |
" <td>0.113</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1</td>\n", | |
" <td>16</td>\n", | |
" <td>27</td>\n", | |
" <td>94</td>\n", | |
" <td>633.0</td>\n", | |
" <td>0.098</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>247</th>\n", | |
" <td>0</td>\n", | |
" <td>12</td>\n", | |
" <td>23</td>\n", | |
" <td>96</td>\n", | |
" <td>652.0</td>\n", | |
" <td>0.113</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>248</th>\n", | |
" <td>0</td>\n", | |
" <td>10</td>\n", | |
" <td>17</td>\n", | |
" <td>87</td>\n", | |
" <td>555.0</td>\n", | |
" <td>0.102</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>249</th>\n", | |
" <td>0</td>\n", | |
" <td>18</td>\n", | |
" <td>12</td>\n", | |
" <td>72</td>\n", | |
" <td>394.0</td>\n", | |
" <td>0.081</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>250</th>\n", | |
" <td>0</td>\n", | |
" <td>10</td>\n", | |
" <td>18</td>\n", | |
" <td>74</td>\n", | |
" <td>413.0</td>\n", | |
" <td>0.090</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>251</th>\n", | |
" <td>1</td>\n", | |
" <td>11</td>\n", | |
" <td>21</td>\n", | |
" <td>97</td>\n", | |
" <td>668.0</td>\n", | |
" <td>0.117</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>252 rows × 6 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" diagnosis_result radius texture perimeter area smoothness\n", | |
"0 0 22 14 78 386.0 0.070\n", | |
"1 0 19 27 62 295.0 0.102\n", | |
"2 0 21 24 74 402.0 0.110\n", | |
"3 1 16 27 94 578.0 0.113\n", | |
"4 1 16 27 94 633.0 0.098\n", | |
".. ... ... ... ... ... ...\n", | |
"247 0 12 23 96 652.0 0.113\n", | |
"248 0 10 17 87 555.0 0.102\n", | |
"249 0 18 12 72 394.0 0.081\n", | |
"250 0 10 18 74 413.0 0.090\n", | |
"251 1 11 21 97 668.0 0.117\n", | |
"\n", | |
"[252 rows x 6 columns]" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sql = '''\n", | |
" SELECT \n", | |
" a.*, \n", | |
" b.area, b.smoothness\n", | |
" FROM \n", | |
" data_00 AS a\n", | |
" LEFT JOIN data_01 AS b\n", | |
" ON (a.perimeter = b.perimeter)\n", | |
" '''\n", | |
"bc.sql(sql)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>diagnosis_result</th>\n", | |
" <th>radius</th>\n", | |
" <th>texture</th>\n", | |
" <th>perimeter</th>\n", | |
" <th>area</th>\n", | |
" <th>smoothness</th>\n", | |
" <th>compactness</th>\n", | |
" <th>symmetry</th>\n", | |
" <th>fractal_dimension</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>19</td>\n", | |
" <td>27</td>\n", | |
" <td>72</td>\n", | |
" <td>394.0</td>\n", | |
" <td>0.081</td>\n", | |
" <td>0.047</td>\n", | |
" <td>0.152</td>\n", | |
" <td>0.057</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0</td>\n", | |
" <td>20</td>\n", | |
" <td>21</td>\n", | |
" <td>78</td>\n", | |
" <td>449.0</td>\n", | |
" <td>0.103</td>\n", | |
" <td>0.091</td>\n", | |
" <td>0.168</td>\n", | |
" <td>0.060</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>18</td>\n", | |
" <td>25</td>\n", | |
" <td>97</td>\n", | |
" <td>668.0</td>\n", | |
" <td>0.117</td>\n", | |
" <td>0.148</td>\n", | |
" <td>0.195</td>\n", | |
" <td>0.067</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>23</td>\n", | |
" <td>12</td>\n", | |
" <td>151</td>\n", | |
" <td>954.0</td>\n", | |
" <td>0.143</td>\n", | |
" <td>0.278</td>\n", | |
" <td>0.242</td>\n", | |
" <td>0.079</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>0</td>\n", | |
" <td>9</td>\n", | |
" <td>13</td>\n", | |
" <td>133</td>\n", | |
" <td>1326.0</td>\n", | |
" <td>0.143</td>\n", | |
" <td>0.079</td>\n", | |
" <td>0.181</td>\n", | |
" <td>0.057</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>332</th>\n", | |
" <td>0</td>\n", | |
" <td>14</td>\n", | |
" <td>14</td>\n", | |
" <td>85</td>\n", | |
" <td>552.0</td>\n", | |
" <td>0.074</td>\n", | |
" <td>0.051</td>\n", | |
" <td>0.139</td>\n", | |
" <td>0.053</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>333</th>\n", | |
" <td>1</td>\n", | |
" <td>16</td>\n", | |
" <td>19</td>\n", | |
" <td>83</td>\n", | |
" <td>499.0</td>\n", | |
" <td>0.112</td>\n", | |
" <td>0.126</td>\n", | |
" <td>0.191</td>\n", | |
" <td>0.066</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>334</th>\n", | |
" <td>0</td>\n", | |
" <td>22</td>\n", | |
" <td>16</td>\n", | |
" <td>83</td>\n", | |
" <td>499.0</td>\n", | |
" <td>0.112</td>\n", | |
" <td>0.126</td>\n", | |
" <td>0.191</td>\n", | |
" <td>0.066</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>335</th>\n", | |
" <td>0</td>\n", | |
" <td>10</td>\n", | |
" <td>17</td>\n", | |
" <td>88</td>\n", | |
" <td>559.0</td>\n", | |
" <td>0.129</td>\n", | |
" <td>0.105</td>\n", | |
" <td>0.240</td>\n", | |
" <td>0.066</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>336</th>\n", | |
" <td>0</td>\n", | |
" <td>10</td>\n", | |
" <td>17</td>\n", | |
" <td>88</td>\n", | |
" <td>559.0</td>\n", | |
" <td>0.129</td>\n", | |
" <td>0.105</td>\n", | |
" <td>0.213</td>\n", | |
" <td>0.060</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>337 rows × 9 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" diagnosis_result radius texture perimeter area smoothness \\\n", | |
"0 1 19 27 72 394.0 0.081 \n", | |
"1 0 20 21 78 449.0 0.103 \n", | |
"2 1 18 25 97 668.0 0.117 \n", | |
"3 1 23 12 151 954.0 0.143 \n", | |
"4 0 9 13 133 1326.0 0.143 \n", | |
".. ... ... ... ... ... ... \n", | |
"332 0 14 14 85 552.0 0.074 \n", | |
"333 1 16 19 83 499.0 0.112 \n", | |
"334 0 22 16 83 499.0 0.112 \n", | |
"335 0 10 17 88 559.0 0.129 \n", | |
"336 0 10 17 88 559.0 0.129 \n", | |
"\n", | |
" compactness symmetry fractal_dimension \n", | |
"0 0.047 0.152 0.057 \n", | |
"1 0.091 0.168 0.060 \n", | |
"2 0.148 0.195 0.067 \n", | |
"3 0.278 0.242 0.079 \n", | |
"4 0.079 0.181 0.057 \n", | |
".. ... ... ... \n", | |
"332 0.051 0.139 0.053 \n", | |
"333 0.126 0.191 0.066 \n", | |
"334 0.126 0.191 0.066 \n", | |
"335 0.105 0.240 0.066 \n", | |
"336 0.105 0.213 0.060 \n", | |
"\n", | |
"[337 rows x 9 columns]" | |
] | |
}, | |
"execution_count": 19, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sql = '''\n", | |
" SELECT \n", | |
" a.*, \n", | |
" b.area, b.smoothness, \n", | |
" c.* \n", | |
" FROM \n", | |
" data_00 AS a\n", | |
" JOIN data_01 AS b\n", | |
" ON (a.perimeter = b.perimeter)\n", | |
" JOIN data_02 as c\n", | |
" ON (b.compactness = c.compactness)\n", | |
" '''\n", | |
"bc.sql(sql)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### LEFT JOIN" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"sql = '''\n", | |
" SELECT \n", | |
" a.*, \n", | |
" b.area, b.smoothness, \n", | |
" c.* \n", | |
" FROM data_00 AS a\n", | |
" LEFT JOIN data_01 AS b\n", | |
" ON (a.perimeter = b.perimeter)\n", | |
" LEFT JOIN data_02 as c\n", | |
" ON (b.compactness = c.compactness)\n", | |
" '''\n", | |
"bc.sql(sql)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Stock Data ([NVDA, TSLA](https://github.com/gumdropsteve/datasets/blob/master/stocks))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = cudf.read_csv('https://raw.githubusercontent.com/gumdropsteve/datasets/master/stocks/NVDA.csv')\n", | |
"bc.create_table('nvda', df)\n", | |
"\n", | |
"df = cudf.read_csv('https://raw.githubusercontent.com/gumdropsteve/datasets/master/stocks/TSLA.csv')\n", | |
"bc.create_table('tsla', df)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>ds</th>\n", | |
" <th>nvda_open</th>\n", | |
" <th>tsla_open</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2016-11-08</td>\n", | |
" <td>71.540001</td>\n", | |
" <td>38.757999</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2016-11-09</td>\n", | |
" <td>69.230003</td>\n", | |
" <td>37.375999</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2016-11-10</td>\n", | |
" <td>71.489998</td>\n", | |
" <td>38.209999</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>2016-11-11</td>\n", | |
" <td>79.510002</td>\n", | |
" <td>36.848000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>2016-11-14</td>\n", | |
" <td>88.089996</td>\n", | |
" <td>37.599998</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2512</th>\n", | |
" <td>2018-03-28</td>\n", | |
" <td>224.130005</td>\n", | |
" <td>52.916000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2513</th>\n", | |
" <td>2018-03-29</td>\n", | |
" <td>224.110001</td>\n", | |
" <td>51.298000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2514</th>\n", | |
" <td>2018-04-02</td>\n", | |
" <td>228.740005</td>\n", | |
" <td>51.251999</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2515</th>\n", | |
" <td>2018-04-03</td>\n", | |
" <td>227.800003</td>\n", | |
" <td>53.964001</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2516</th>\n", | |
" <td>2018-04-04</td>\n", | |
" <td>215.009995</td>\n", | |
" <td>50.556000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>2517 rows × 3 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" ds nvda_open tsla_open\n", | |
"0 2016-11-08 71.540001 38.757999\n", | |
"1 2016-11-09 69.230003 37.375999\n", | |
"2 2016-11-10 71.489998 38.209999\n", | |
"3 2016-11-11 79.510002 36.848000\n", | |
"4 2016-11-14 88.089996 37.599998\n", | |
"... ... ... ...\n", | |
"2512 2018-03-28 224.130005 52.916000\n", | |
"2513 2018-03-29 224.110001 51.298000\n", | |
"2514 2018-04-02 228.740005 51.251999\n", | |
"2515 2018-04-03 227.800003 53.964001\n", | |
"2516 2018-04-04 215.009995 50.556000\n", | |
"\n", | |
"[2517 rows x 3 columns]" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"query = '''\n", | |
" SELECT\n", | |
" nvda.ds AS ds,\n", | |
" \n", | |
" nvda.open_price AS nvda_open,\n", | |
" \n", | |
" tsla.opening_price AS tsla_open\n", | |
" \n", | |
" FROM\n", | |
" nvda\n", | |
" JOIN\n", | |
" tsla\n", | |
" \n", | |
" ON nvda.ds = tsla.ds\n", | |
" '''\n", | |
"bc.sql(query)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Double LEFT JOIN" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>diagnosis_result</th>\n", | |
" <th>radius</th>\n", | |
" <th>texture</th>\n", | |
" <th>perimeter</th>\n", | |
" <th>perimeter0</th>\n", | |
" <th>area</th>\n", | |
" <th>smoothness</th>\n", | |
" <th>compactness</th>\n", | |
" <th>compactness0</th>\n", | |
" <th>symmetry</th>\n", | |
" <th>fractal_dimension</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>19</td>\n", | |
" <td>27</td>\n", | |
" <td>72</td>\n", | |
" <td>72.0</td>\n", | |
" <td>394.0</td>\n", | |
" <td>0.081</td>\n", | |
" <td>0.047</td>\n", | |
" <td>0.047</td>\n", | |
" <td>0.152</td>\n", | |
" <td>0.057</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0</td>\n", | |
" <td>20</td>\n", | |
" <td>21</td>\n", | |
" <td>78</td>\n", | |
" <td>78.0</td>\n", | |
" <td>449.0</td>\n", | |
" <td>0.103</td>\n", | |
" <td>0.091</td>\n", | |
" <td>0.091</td>\n", | |
" <td>0.168</td>\n", | |
" <td>0.060</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>18</td>\n", | |
" <td>25</td>\n", | |
" <td>97</td>\n", | |
" <td>97.0</td>\n", | |
" <td>668.0</td>\n", | |
" <td>0.117</td>\n", | |
" <td>0.148</td>\n", | |
" <td>0.148</td>\n", | |
" <td>0.195</td>\n", | |
" <td>0.067</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>23</td>\n", | |
" <td>12</td>\n", | |
" <td>151</td>\n", | |
" <td>151.0</td>\n", | |
" <td>954.0</td>\n", | |
" <td>0.143</td>\n", | |
" <td>0.278</td>\n", | |
" <td>0.278</td>\n", | |
" <td>0.242</td>\n", | |
" <td>0.079</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>0</td>\n", | |
" <td>9</td>\n", | |
" <td>13</td>\n", | |
" <td>133</td>\n", | |
" <td>133.0</td>\n", | |
" <td>1326.0</td>\n", | |
" <td>0.143</td>\n", | |
" <td>0.079</td>\n", | |
" <td>0.079</td>\n", | |
" <td>0.181</td>\n", | |
" <td>0.057</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>332</th>\n", | |
" <td>0</td>\n", | |
" <td>14</td>\n", | |
" <td>14</td>\n", | |
" <td>85</td>\n", | |
" <td>85.0</td>\n", | |
" <td>552.0</td>\n", | |
" <td>0.074</td>\n", | |
" <td>0.051</td>\n", | |
" <td>0.051</td>\n", | |
" <td>0.139</td>\n", | |
" <td>0.053</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>333</th>\n", | |
" <td>1</td>\n", | |
" <td>16</td>\n", | |
" <td>19</td>\n", | |
" <td>83</td>\n", | |
" <td>83.0</td>\n", | |
" <td>499.0</td>\n", | |
" <td>0.112</td>\n", | |
" <td>0.126</td>\n", | |
" <td>0.126</td>\n", | |
" <td>0.191</td>\n", | |
" <td>0.066</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>334</th>\n", | |
" <td>0</td>\n", | |
" <td>22</td>\n", | |
" <td>16</td>\n", | |
" <td>83</td>\n", | |
" <td>83.0</td>\n", | |
" <td>499.0</td>\n", | |
" <td>0.112</td>\n", | |
" <td>0.126</td>\n", | |
" <td>0.126</td>\n", | |
" <td>0.191</td>\n", | |
" <td>0.066</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>335</th>\n", | |
" <td>0</td>\n", | |
" <td>10</td>\n", | |
" <td>17</td>\n", | |
" <td>88</td>\n", | |
" <td>88.0</td>\n", | |
" <td>559.0</td>\n", | |
" <td>0.129</td>\n", | |
" <td>0.105</td>\n", | |
" <td>0.105</td>\n", | |
" <td>0.240</td>\n", | |
" <td>0.066</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>336</th>\n", | |
" <td>0</td>\n", | |
" <td>10</td>\n", | |
" <td>17</td>\n", | |
" <td>88</td>\n", | |
" <td>88.0</td>\n", | |
" <td>559.0</td>\n", | |
" <td>0.129</td>\n", | |
" <td>0.105</td>\n", | |
" <td>0.105</td>\n", | |
" <td>0.213</td>\n", | |
" <td>0.060</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>337 rows × 11 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" diagnosis_result radius texture perimeter perimeter0 area \\\n", | |
"0 1 19 27 72 72.0 394.0 \n", | |
"1 0 20 21 78 78.0 449.0 \n", | |
"2 1 18 25 97 97.0 668.0 \n", | |
"3 1 23 12 151 151.0 954.0 \n", | |
"4 0 9 13 133 133.0 1326.0 \n", | |
".. ... ... ... ... ... ... \n", | |
"332 0 14 14 85 85.0 552.0 \n", | |
"333 1 16 19 83 83.0 499.0 \n", | |
"334 0 22 16 83 83.0 499.0 \n", | |
"335 0 10 17 88 88.0 559.0 \n", | |
"336 0 10 17 88 88.0 559.0 \n", | |
"\n", | |
" smoothness compactness compactness0 symmetry fractal_dimension \n", | |
"0 0.081 0.047 0.047 0.152 0.057 \n", | |
"1 0.103 0.091 0.091 0.168 0.060 \n", | |
"2 0.117 0.148 0.148 0.195 0.067 \n", | |
"3 0.143 0.278 0.278 0.242 0.079 \n", | |
"4 0.143 0.079 0.079 0.181 0.057 \n", | |
".. ... ... ... ... ... \n", | |
"332 0.074 0.051 0.051 0.139 0.053 \n", | |
"333 0.112 0.126 0.126 0.191 0.066 \n", | |
"334 0.112 0.126 0.126 0.191 0.066 \n", | |
"335 0.129 0.105 0.105 0.240 0.066 \n", | |
"336 0.129 0.105 0.105 0.213 0.060 \n", | |
"\n", | |
"[337 rows x 11 columns]" | |
] | |
}, | |
"execution_count": 21, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sql = '''\n", | |
" SELECT \n", | |
" a.*, \n", | |
" b.area, b.smoothness, \n", | |
" c.* \n", | |
" FROM data_00 AS a\n", | |
" LEFT JOIN data_01 AS b\n", | |
" ON (a.perimeter = b.perimeter)\n", | |
" LEFT JOIN data_02 as c\n", | |
" ON (b.compactness = c.compactness)\n", | |
" '''\n", | |
"bc.sql(sql)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "RAPIDS Stable", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment