Skip to content

Instantly share code, notes, and snippets.

@genkuroki
Last active May 13, 2021 05:22
Show Gist options
  • Save genkuroki/40efed65a6d5639ff6aca2419a8d947d to your computer and use it in GitHub Desktop.
Save genkuroki/40efed65a6d5639ff6aca2419a8d947d to your computer and use it in GitHub Desktop.
convert iris species to numbers
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "using DataFrames, CSV\niris_csv = joinpath(dirname(pathof(DataFrames)), \"..\", \"docs\", \"src\", \"assets\", \"iris.csv\")",
"execution_count": 1,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 1,
"data": {
"text/plain": "\"C:\\\\Users\\\\genkuroki\\\\.julia\\\\packages\\\\DataFrames\\\\nxjiD\\\\src\\\\..\\\\docs\\\\src\\\\assets\\\\iris.csv\""
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "iris = CSV.read(iris_csv, DataFrame)\nfirst(iris, 5)",
"execution_count": 2,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 2,
"data": {
"text/plain": "\u001b[1m5×5 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m SepalLength \u001b[0m\u001b[1m SepalWidth \u001b[0m\u001b[1m PetalLength \u001b[0m\u001b[1m PetalWidth \u001b[0m\u001b[1m Species \u001b[0m\n\u001b[1m \u001b[0m│\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m String \u001b[0m\n─────┼───────────────────────────────────────────────────────────────\n 1 │ 5.1 3.5 1.4 0.2 Iris-setosa\n 2 │ 4.9 3.0 1.4 0.2 Iris-setosa\n 3 │ 4.7 3.2 1.3 0.2 Iris-setosa\n 4 │ 4.6 3.1 1.5 0.2 Iris-setosa\n 5 │ 5.0 3.6 1.4 0.2 Iris-setosa",
"text/html": "<table class=\"data-frame\"><thead><tr><th></th><th>SepalLength</th><th>SepalWidth</th><th>PetalLength</th><th>PetalWidth</th><th>Species</th></tr><tr><th></th><th>Float64</th><th>Float64</th><th>Float64</th><th>Float64</th><th>String</th></tr></thead><tbody><p>5 rows × 5 columns</p><tr><th>1</th><td>5.1</td><td>3.5</td><td>1.4</td><td>0.2</td><td>Iris-setosa</td></tr><tr><th>2</th><td>4.9</td><td>3.0</td><td>1.4</td><td>0.2</td><td>Iris-setosa</td></tr><tr><th>3</th><td>4.7</td><td>3.2</td><td>1.3</td><td>0.2</td><td>Iris-setosa</td></tr><tr><th>4</th><td>4.6</td><td>3.1</td><td>1.5</td><td>0.2</td><td>Iris-setosa</td></tr><tr><th>5</th><td>5.0</td><td>3.6</td><td>1.4</td><td>0.2</td><td>Iris-setosa</td></tr></tbody></table>",
"text/latex": "\\begin{tabular}{r|ccccc}\n\t& SepalLength & SepalWidth & PetalLength & PetalWidth & Species\\\\\n\t\\hline\n\t& Float64 & Float64 & Float64 & Float64 & String\\\\\n\t\\hline\n\t1 & 5.1 & 3.5 & 1.4 & 0.2 & Iris-setosa \\\\\n\t2 & 4.9 & 3.0 & 1.4 & 0.2 & Iris-setosa \\\\\n\t3 & 4.7 & 3.2 & 1.3 & 0.2 & Iris-setosa \\\\\n\t4 & 4.6 & 3.1 & 1.5 & 0.2 & Iris-setosa \\\\\n\t5 & 5.0 & 3.6 & 1.4 & 0.2 & Iris-setosa \\\\\n\\end{tabular}\n"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "vec_species = unique(iris.Species)",
"execution_count": 3,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 3,
"data": {
"text/plain": "3-element Vector{String}:\n \"Iris-setosa\"\n \"Iris-versicolor\"\n \"Iris-virginica\""
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "dict_species = Dict((v => k for (k, v) in enumerate(vec_species)))",
"execution_count": 4,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 4,
"data": {
"text/plain": "Dict{String, Int64} with 3 entries:\n \"Iris-virginica\" => 3\n \"Iris-setosa\" => 1\n \"Iris-versicolor\" => 2"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "num_species = (v -> dict_species[v]).(iris.Species)\nfirst(num_species, 5)",
"execution_count": 5,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 5,
"data": {
"text/plain": "5-element Vector{Int64}:\n 1\n 1\n 1\n 1\n 1"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "(k -> vec_species[k]).(num_species) == iris.Species",
"execution_count": 6,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 6,
"data": {
"text/plain": "true"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "df = copy(iris)\ndf.NumSpecies = num_species\nfirst(df, 5)",
"execution_count": 7,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 7,
"data": {
"text/plain": "\u001b[1m5×6 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m SepalLength \u001b[0m\u001b[1m SepalWidth \u001b[0m\u001b[1m PetalLength \u001b[0m\u001b[1m PetalWidth \u001b[0m\u001b[1m Species \u001b[0m\u001b[1m NumSpeci\u001b[0m ⋯\n\u001b[1m \u001b[0m│\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m String \u001b[0m\u001b[90m Int64 \u001b[0m ⋯\n─────┼──────────────────────────────────────────────────────────────────────────\n 1 │ 5.1 3.5 1.4 0.2 Iris-setosa ⋯\n 2 │ 4.9 3.0 1.4 0.2 Iris-setosa\n 3 │ 4.7 3.2 1.3 0.2 Iris-setosa\n 4 │ 4.6 3.1 1.5 0.2 Iris-setosa\n 5 │ 5.0 3.6 1.4 0.2 Iris-setosa ⋯\n\u001b[36m 1 column omitted\u001b[0m",
"text/html": "<table class=\"data-frame\"><thead><tr><th></th><th>SepalLength</th><th>SepalWidth</th><th>PetalLength</th><th>PetalWidth</th><th>Species</th><th>NumSpecies</th></tr><tr><th></th><th>Float64</th><th>Float64</th><th>Float64</th><th>Float64</th><th>String</th><th>Int64</th></tr></thead><tbody><p>5 rows × 6 columns</p><tr><th>1</th><td>5.1</td><td>3.5</td><td>1.4</td><td>0.2</td><td>Iris-setosa</td><td>1</td></tr><tr><th>2</th><td>4.9</td><td>3.0</td><td>1.4</td><td>0.2</td><td>Iris-setosa</td><td>1</td></tr><tr><th>3</th><td>4.7</td><td>3.2</td><td>1.3</td><td>0.2</td><td>Iris-setosa</td><td>1</td></tr><tr><th>4</th><td>4.6</td><td>3.1</td><td>1.5</td><td>0.2</td><td>Iris-setosa</td><td>1</td></tr><tr><th>5</th><td>5.0</td><td>3.6</td><td>1.4</td><td>0.2</td><td>Iris-setosa</td><td>1</td></tr></tbody></table>",
"text/latex": "\\begin{tabular}{r|cccccc}\n\t& SepalLength & SepalWidth & PetalLength & PetalWidth & Species & NumSpecies\\\\\n\t\\hline\n\t& Float64 & Float64 & Float64 & Float64 & String & Int64\\\\\n\t\\hline\n\t1 & 5.1 & 3.5 & 1.4 & 0.2 & Iris-setosa & 1 \\\\\n\t2 & 4.9 & 3.0 & 1.4 & 0.2 & Iris-setosa & 1 \\\\\n\t3 & 4.7 & 3.2 & 1.3 & 0.2 & Iris-setosa & 1 \\\\\n\t4 & 4.6 & 3.1 & 1.5 & 0.2 & Iris-setosa & 1 \\\\\n\t5 & 5.0 & 3.6 & 1.4 & 0.2 & Iris-setosa & 1 \\\\\n\\end{tabular}\n"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "df2 = copy(iris)\ndf2.Species = num_species\nfirst(df2, 5)",
"execution_count": 8,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 8,
"data": {
"text/plain": "\u001b[1m5×5 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m SepalLength \u001b[0m\u001b[1m SepalWidth \u001b[0m\u001b[1m PetalLength \u001b[0m\u001b[1m PetalWidth \u001b[0m\u001b[1m Species \u001b[0m\n\u001b[1m \u001b[0m│\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼───────────────────────────────────────────────────────────\n 1 │ 5.1 3.5 1.4 0.2 1\n 2 │ 4.9 3.0 1.4 0.2 1\n 3 │ 4.7 3.2 1.3 0.2 1\n 4 │ 4.6 3.1 1.5 0.2 1\n 5 │ 5.0 3.6 1.4 0.2 1",
"text/html": "<table class=\"data-frame\"><thead><tr><th></th><th>SepalLength</th><th>SepalWidth</th><th>PetalLength</th><th>PetalWidth</th><th>Species</th></tr><tr><th></th><th>Float64</th><th>Float64</th><th>Float64</th><th>Float64</th><th>Int64</th></tr></thead><tbody><p>5 rows × 5 columns</p><tr><th>1</th><td>5.1</td><td>3.5</td><td>1.4</td><td>0.2</td><td>1</td></tr><tr><th>2</th><td>4.9</td><td>3.0</td><td>1.4</td><td>0.2</td><td>1</td></tr><tr><th>3</th><td>4.7</td><td>3.2</td><td>1.3</td><td>0.2</td><td>1</td></tr><tr><th>4</th><td>4.6</td><td>3.1</td><td>1.5</td><td>0.2</td><td>1</td></tr><tr><th>5</th><td>5.0</td><td>3.6</td><td>1.4</td><td>0.2</td><td>1</td></tr></tbody></table>",
"text/latex": "\\begin{tabular}{r|ccccc}\n\t& SepalLength & SepalWidth & PetalLength & PetalWidth & Species\\\\\n\t\\hline\n\t& Float64 & Float64 & Float64 & Float64 & Int64\\\\\n\t\\hline\n\t1 & 5.1 & 3.5 & 1.4 & 0.2 & 1 \\\\\n\t2 & 4.9 & 3.0 & 1.4 & 0.2 & 1 \\\\\n\t3 & 4.7 & 3.2 & 1.3 & 0.2 & 1 \\\\\n\t4 & 4.6 & 3.1 & 1.5 & 0.2 & 1 \\\\\n\t5 & 5.0 & 3.6 & 1.4 & 0.2 & 1 \\\\\n\\end{tabular}\n"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "df3 = copy(iris)\ndf3.Species = float(num_species)\nfirst(df3, 5)",
"execution_count": 9,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 9,
"data": {
"text/plain": "\u001b[1m5×5 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m SepalLength \u001b[0m\u001b[1m SepalWidth \u001b[0m\u001b[1m PetalLength \u001b[0m\u001b[1m PetalWidth \u001b[0m\u001b[1m Species \u001b[0m\n\u001b[1m \u001b[0m│\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\n─────┼───────────────────────────────────────────────────────────\n 1 │ 5.1 3.5 1.4 0.2 1.0\n 2 │ 4.9 3.0 1.4 0.2 1.0\n 3 │ 4.7 3.2 1.3 0.2 1.0\n 4 │ 4.6 3.1 1.5 0.2 1.0\n 5 │ 5.0 3.6 1.4 0.2 1.0",
"text/html": "<table class=\"data-frame\"><thead><tr><th></th><th>SepalLength</th><th>SepalWidth</th><th>PetalLength</th><th>PetalWidth</th><th>Species</th></tr><tr><th></th><th>Float64</th><th>Float64</th><th>Float64</th><th>Float64</th><th>Float64</th></tr></thead><tbody><p>5 rows × 5 columns</p><tr><th>1</th><td>5.1</td><td>3.5</td><td>1.4</td><td>0.2</td><td>1.0</td></tr><tr><th>2</th><td>4.9</td><td>3.0</td><td>1.4</td><td>0.2</td><td>1.0</td></tr><tr><th>3</th><td>4.7</td><td>3.2</td><td>1.3</td><td>0.2</td><td>1.0</td></tr><tr><th>4</th><td>4.6</td><td>3.1</td><td>1.5</td><td>0.2</td><td>1.0</td></tr><tr><th>5</th><td>5.0</td><td>3.6</td><td>1.4</td><td>0.2</td><td>1.0</td></tr></tbody></table>",
"text/latex": "\\begin{tabular}{r|ccccc}\n\t& SepalLength & SepalWidth & PetalLength & PetalWidth & Species\\\\\n\t\\hline\n\t& Float64 & Float64 & Float64 & Float64 & Float64\\\\\n\t\\hline\n\t1 & 5.1 & 3.5 & 1.4 & 0.2 & 1.0 \\\\\n\t2 & 4.9 & 3.0 & 1.4 & 0.2 & 1.0 \\\\\n\t3 & 4.7 & 3.2 & 1.3 & 0.2 & 1.0 \\\\\n\t4 & 4.6 & 3.1 & 1.5 & 0.2 & 1.0 \\\\\n\t5 & 5.0 & 3.6 & 1.4 & 0.2 & 1.0 \\\\\n\\end{tabular}\n"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"kernelspec": {
"name": "julia-1.7-depwarn-o3",
"display_name": "Julia 1.7.0-DEV depwarn -O3",
"language": "julia"
},
"toc": {
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"base_numbering": 1,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
},
"language_info": {
"file_extension": ".jl",
"name": "julia",
"mimetype": "application/julia",
"version": "1.7.0"
},
"@webio": {
"lastKernelId": null,
"lastCommId": null
},
"gist": {
"id": "40efed65a6d5639ff6aca2419a8d947d",
"data": {
"description": "convert iris species to numbers",
"public": true
}
},
"_draft": {
"nbviewer_url": "https://gist.github.com/40efed65a6d5639ff6aca2419a8d947d"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment