Last active
May 13, 2021 05:22
-
-
Save genkuroki/40efed65a6d5639ff6aca2419a8d947d to your computer and use it in GitHub Desktop.
convert iris species to numbers
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "using DataFrames, CSV\niris_csv = joinpath(dirname(pathof(DataFrames)), \"..\", \"docs\", \"src\", \"assets\", \"iris.csv\")", | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 1, | |
"data": { | |
"text/plain": "\"C:\\\\Users\\\\genkuroki\\\\.julia\\\\packages\\\\DataFrames\\\\nxjiD\\\\src\\\\..\\\\docs\\\\src\\\\assets\\\\iris.csv\"" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "iris = CSV.read(iris_csv, DataFrame)\nfirst(iris, 5)", | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 2, | |
"data": { | |
"text/plain": "\u001b[1m5×5 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m SepalLength \u001b[0m\u001b[1m SepalWidth \u001b[0m\u001b[1m PetalLength \u001b[0m\u001b[1m PetalWidth \u001b[0m\u001b[1m Species \u001b[0m\n\u001b[1m \u001b[0m│\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m String \u001b[0m\n─────┼───────────────────────────────────────────────────────────────\n 1 │ 5.1 3.5 1.4 0.2 Iris-setosa\n 2 │ 4.9 3.0 1.4 0.2 Iris-setosa\n 3 │ 4.7 3.2 1.3 0.2 Iris-setosa\n 4 │ 4.6 3.1 1.5 0.2 Iris-setosa\n 5 │ 5.0 3.6 1.4 0.2 Iris-setosa", | |
"text/html": "<table class=\"data-frame\"><thead><tr><th></th><th>SepalLength</th><th>SepalWidth</th><th>PetalLength</th><th>PetalWidth</th><th>Species</th></tr><tr><th></th><th>Float64</th><th>Float64</th><th>Float64</th><th>Float64</th><th>String</th></tr></thead><tbody><p>5 rows × 5 columns</p><tr><th>1</th><td>5.1</td><td>3.5</td><td>1.4</td><td>0.2</td><td>Iris-setosa</td></tr><tr><th>2</th><td>4.9</td><td>3.0</td><td>1.4</td><td>0.2</td><td>Iris-setosa</td></tr><tr><th>3</th><td>4.7</td><td>3.2</td><td>1.3</td><td>0.2</td><td>Iris-setosa</td></tr><tr><th>4</th><td>4.6</td><td>3.1</td><td>1.5</td><td>0.2</td><td>Iris-setosa</td></tr><tr><th>5</th><td>5.0</td><td>3.6</td><td>1.4</td><td>0.2</td><td>Iris-setosa</td></tr></tbody></table>", | |
"text/latex": "\\begin{tabular}{r|ccccc}\n\t& SepalLength & SepalWidth & PetalLength & PetalWidth & Species\\\\\n\t\\hline\n\t& Float64 & Float64 & Float64 & Float64 & String\\\\\n\t\\hline\n\t1 & 5.1 & 3.5 & 1.4 & 0.2 & Iris-setosa \\\\\n\t2 & 4.9 & 3.0 & 1.4 & 0.2 & Iris-setosa \\\\\n\t3 & 4.7 & 3.2 & 1.3 & 0.2 & Iris-setosa \\\\\n\t4 & 4.6 & 3.1 & 1.5 & 0.2 & Iris-setosa \\\\\n\t5 & 5.0 & 3.6 & 1.4 & 0.2 & Iris-setosa \\\\\n\\end{tabular}\n" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "vec_species = unique(iris.Species)", | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 3, | |
"data": { | |
"text/plain": "3-element Vector{String}:\n \"Iris-setosa\"\n \"Iris-versicolor\"\n \"Iris-virginica\"" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "dict_species = Dict((v => k for (k, v) in enumerate(vec_species)))", | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 4, | |
"data": { | |
"text/plain": "Dict{String, Int64} with 3 entries:\n \"Iris-virginica\" => 3\n \"Iris-setosa\" => 1\n \"Iris-versicolor\" => 2" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "num_species = (v -> dict_species[v]).(iris.Species)\nfirst(num_species, 5)", | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 5, | |
"data": { | |
"text/plain": "5-element Vector{Int64}:\n 1\n 1\n 1\n 1\n 1" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "(k -> vec_species[k]).(num_species) == iris.Species", | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 6, | |
"data": { | |
"text/plain": "true" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df = copy(iris)\ndf.NumSpecies = num_species\nfirst(df, 5)", | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 7, | |
"data": { | |
"text/plain": "\u001b[1m5×6 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m SepalLength \u001b[0m\u001b[1m SepalWidth \u001b[0m\u001b[1m PetalLength \u001b[0m\u001b[1m PetalWidth \u001b[0m\u001b[1m Species \u001b[0m\u001b[1m NumSpeci\u001b[0m ⋯\n\u001b[1m \u001b[0m│\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m String \u001b[0m\u001b[90m Int64 \u001b[0m ⋯\n─────┼──────────────────────────────────────────────────────────────────────────\n 1 │ 5.1 3.5 1.4 0.2 Iris-setosa ⋯\n 2 │ 4.9 3.0 1.4 0.2 Iris-setosa\n 3 │ 4.7 3.2 1.3 0.2 Iris-setosa\n 4 │ 4.6 3.1 1.5 0.2 Iris-setosa\n 5 │ 5.0 3.6 1.4 0.2 Iris-setosa ⋯\n\u001b[36m 1 column omitted\u001b[0m", | |
"text/html": "<table class=\"data-frame\"><thead><tr><th></th><th>SepalLength</th><th>SepalWidth</th><th>PetalLength</th><th>PetalWidth</th><th>Species</th><th>NumSpecies</th></tr><tr><th></th><th>Float64</th><th>Float64</th><th>Float64</th><th>Float64</th><th>String</th><th>Int64</th></tr></thead><tbody><p>5 rows × 6 columns</p><tr><th>1</th><td>5.1</td><td>3.5</td><td>1.4</td><td>0.2</td><td>Iris-setosa</td><td>1</td></tr><tr><th>2</th><td>4.9</td><td>3.0</td><td>1.4</td><td>0.2</td><td>Iris-setosa</td><td>1</td></tr><tr><th>3</th><td>4.7</td><td>3.2</td><td>1.3</td><td>0.2</td><td>Iris-setosa</td><td>1</td></tr><tr><th>4</th><td>4.6</td><td>3.1</td><td>1.5</td><td>0.2</td><td>Iris-setosa</td><td>1</td></tr><tr><th>5</th><td>5.0</td><td>3.6</td><td>1.4</td><td>0.2</td><td>Iris-setosa</td><td>1</td></tr></tbody></table>", | |
"text/latex": "\\begin{tabular}{r|cccccc}\n\t& SepalLength & SepalWidth & PetalLength & PetalWidth & Species & NumSpecies\\\\\n\t\\hline\n\t& Float64 & Float64 & Float64 & Float64 & String & Int64\\\\\n\t\\hline\n\t1 & 5.1 & 3.5 & 1.4 & 0.2 & Iris-setosa & 1 \\\\\n\t2 & 4.9 & 3.0 & 1.4 & 0.2 & Iris-setosa & 1 \\\\\n\t3 & 4.7 & 3.2 & 1.3 & 0.2 & Iris-setosa & 1 \\\\\n\t4 & 4.6 & 3.1 & 1.5 & 0.2 & Iris-setosa & 1 \\\\\n\t5 & 5.0 & 3.6 & 1.4 & 0.2 & Iris-setosa & 1 \\\\\n\\end{tabular}\n" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df2 = copy(iris)\ndf2.Species = num_species\nfirst(df2, 5)", | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 8, | |
"data": { | |
"text/plain": "\u001b[1m5×5 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m SepalLength \u001b[0m\u001b[1m SepalWidth \u001b[0m\u001b[1m PetalLength \u001b[0m\u001b[1m PetalWidth \u001b[0m\u001b[1m Species \u001b[0m\n\u001b[1m \u001b[0m│\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼───────────────────────────────────────────────────────────\n 1 │ 5.1 3.5 1.4 0.2 1\n 2 │ 4.9 3.0 1.4 0.2 1\n 3 │ 4.7 3.2 1.3 0.2 1\n 4 │ 4.6 3.1 1.5 0.2 1\n 5 │ 5.0 3.6 1.4 0.2 1", | |
"text/html": "<table class=\"data-frame\"><thead><tr><th></th><th>SepalLength</th><th>SepalWidth</th><th>PetalLength</th><th>PetalWidth</th><th>Species</th></tr><tr><th></th><th>Float64</th><th>Float64</th><th>Float64</th><th>Float64</th><th>Int64</th></tr></thead><tbody><p>5 rows × 5 columns</p><tr><th>1</th><td>5.1</td><td>3.5</td><td>1.4</td><td>0.2</td><td>1</td></tr><tr><th>2</th><td>4.9</td><td>3.0</td><td>1.4</td><td>0.2</td><td>1</td></tr><tr><th>3</th><td>4.7</td><td>3.2</td><td>1.3</td><td>0.2</td><td>1</td></tr><tr><th>4</th><td>4.6</td><td>3.1</td><td>1.5</td><td>0.2</td><td>1</td></tr><tr><th>5</th><td>5.0</td><td>3.6</td><td>1.4</td><td>0.2</td><td>1</td></tr></tbody></table>", | |
"text/latex": "\\begin{tabular}{r|ccccc}\n\t& SepalLength & SepalWidth & PetalLength & PetalWidth & Species\\\\\n\t\\hline\n\t& Float64 & Float64 & Float64 & Float64 & Int64\\\\\n\t\\hline\n\t1 & 5.1 & 3.5 & 1.4 & 0.2 & 1 \\\\\n\t2 & 4.9 & 3.0 & 1.4 & 0.2 & 1 \\\\\n\t3 & 4.7 & 3.2 & 1.3 & 0.2 & 1 \\\\\n\t4 & 4.6 & 3.1 & 1.5 & 0.2 & 1 \\\\\n\t5 & 5.0 & 3.6 & 1.4 & 0.2 & 1 \\\\\n\\end{tabular}\n" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df3 = copy(iris)\ndf3.Species = float(num_species)\nfirst(df3, 5)", | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 9, | |
"data": { | |
"text/plain": "\u001b[1m5×5 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m SepalLength \u001b[0m\u001b[1m SepalWidth \u001b[0m\u001b[1m PetalLength \u001b[0m\u001b[1m PetalWidth \u001b[0m\u001b[1m Species \u001b[0m\n\u001b[1m \u001b[0m│\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\n─────┼───────────────────────────────────────────────────────────\n 1 │ 5.1 3.5 1.4 0.2 1.0\n 2 │ 4.9 3.0 1.4 0.2 1.0\n 3 │ 4.7 3.2 1.3 0.2 1.0\n 4 │ 4.6 3.1 1.5 0.2 1.0\n 5 │ 5.0 3.6 1.4 0.2 1.0", | |
"text/html": "<table class=\"data-frame\"><thead><tr><th></th><th>SepalLength</th><th>SepalWidth</th><th>PetalLength</th><th>PetalWidth</th><th>Species</th></tr><tr><th></th><th>Float64</th><th>Float64</th><th>Float64</th><th>Float64</th><th>Float64</th></tr></thead><tbody><p>5 rows × 5 columns</p><tr><th>1</th><td>5.1</td><td>3.5</td><td>1.4</td><td>0.2</td><td>1.0</td></tr><tr><th>2</th><td>4.9</td><td>3.0</td><td>1.4</td><td>0.2</td><td>1.0</td></tr><tr><th>3</th><td>4.7</td><td>3.2</td><td>1.3</td><td>0.2</td><td>1.0</td></tr><tr><th>4</th><td>4.6</td><td>3.1</td><td>1.5</td><td>0.2</td><td>1.0</td></tr><tr><th>5</th><td>5.0</td><td>3.6</td><td>1.4</td><td>0.2</td><td>1.0</td></tr></tbody></table>", | |
"text/latex": "\\begin{tabular}{r|ccccc}\n\t& SepalLength & SepalWidth & PetalLength & PetalWidth & Species\\\\\n\t\\hline\n\t& Float64 & Float64 & Float64 & Float64 & Float64\\\\\n\t\\hline\n\t1 & 5.1 & 3.5 & 1.4 & 0.2 & 1.0 \\\\\n\t2 & 4.9 & 3.0 & 1.4 & 0.2 & 1.0 \\\\\n\t3 & 4.7 & 3.2 & 1.3 & 0.2 & 1.0 \\\\\n\t4 & 4.6 & 3.1 & 1.5 & 0.2 & 1.0 \\\\\n\t5 & 5.0 & 3.6 & 1.4 & 0.2 & 1.0 \\\\\n\\end{tabular}\n" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"name": "julia-1.7-depwarn-o3", | |
"display_name": "Julia 1.7.0-DEV depwarn -O3", | |
"language": "julia" | |
}, | |
"toc": { | |
"nav_menu": {}, | |
"number_sections": true, | |
"sideBar": true, | |
"skip_h1_title": false, | |
"base_numbering": 1, | |
"title_cell": "Table of Contents", | |
"title_sidebar": "Contents", | |
"toc_cell": false, | |
"toc_position": {}, | |
"toc_section_display": true, | |
"toc_window_display": false | |
}, | |
"language_info": { | |
"file_extension": ".jl", | |
"name": "julia", | |
"mimetype": "application/julia", | |
"version": "1.7.0" | |
}, | |
"@webio": { | |
"lastKernelId": null, | |
"lastCommId": null | |
}, | |
"gist": { | |
"id": "40efed65a6d5639ff6aca2419a8d947d", | |
"data": { | |
"description": "convert iris species to numbers", | |
"public": true | |
} | |
}, | |
"_draft": { | |
"nbviewer_url": "https://gist.github.com/40efed65a6d5639ff6aca2419a8d947d" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment