Last active
February 16, 2018 16:25
-
-
Save rcdilorenzo/6af6b1f1c9460ae264bd754511c6ff86 to your computer and use it in GitHub Desktop.
(GIF: https://git.io/vAC88, Data: http://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data) Label columns as factors using a bit of Vim/Spacemacs foo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Assumes an existing data frame called `raw` | |
# that is directly imported from the URL and | |
# has not converted the "?" values to NA's | |
colnames(raw) <- c("Type", "CapShape", "CapSurface", "CapColor", "Bruises", | |
"Odor", "GillAttachment", "GillSpacing", "GillSize", | |
"GillColor", "StalkShape", "StalkRoot", | |
"StalkSurfaceAboveRing", "StalkSurfaceBelowRing", | |
"StalkColorAboveRing", "StalkColorBelowRing", "VeilType", | |
"VeilColor", "RingNumber", "RingType", "SporePrintColor", | |
"Population", "Habitat") | |
raw$Type <- factor(raw$Type, | |
levels = c("e", "p"), | |
labels = c("edible", "poisonous")) | |
raw$CapShape <- factor(raw$CapShape, | |
levels = c("b", "c", "x", "f", "k", "s"), | |
labels = c("bell", "conical", "convex", "flat", | |
"knobbed", "sunken")) | |
raw$CapSurface <- factor(raw$CapSurface, | |
levels = c("f", "g", "y", "s"), | |
labels = c("fibrous", "grooves", "scaly", "smooth")) | |
raw$CapColor <- factor(raw$CapColor, | |
levels = c("n", "b", "c", "g", "r", "p", "u", "e", "w", | |
"y"), | |
labels = c("brown", "buff", "cinnamon", "gray", "green", | |
"pink", "purple", "red", "white", "yellow")) | |
raw$Bruises <- factor(raw$Bruises, | |
levels = c("t", "f"), | |
labels = c("bruises", "no")) | |
raw$Odor <- factor(raw$Odor, | |
levels = c("a", "l", "c", "y", "f", "m", "n", "p", "s"), | |
labels = c("almond", "anise", "creosote", "fishy", "foul", | |
"musty", "none", "pungent", "spicy")) | |
raw$GillAttachment <- factor(raw$GillAttachment, | |
levels = c("a", "d", "f", "n"), | |
labels = c("attached", "descending", "free", | |
"notched")) | |
raw$GillSpacing <- factor(raw$GillSpacing, | |
levels = c("c", "w", "d"), | |
labels = c("close", "crowded", "distant")) | |
raw$GillSize <- factor(raw$GillSize, | |
levels = c("b", "n"), | |
labels = c("broad", "narrow")) | |
raw$GillColor <- factor(raw$GillColor, | |
levels = c("k", "n", "b", "h", "g", "r", "o", "p", "u", | |
"e", "w", "y"), | |
labels = c("black", "brown", "buff", "chocolate", "gray", | |
"green", "orange", "pink", "purple", "red", | |
"white", "yellow")) | |
raw$StalkShape <- factor(raw$StalkShape, | |
levels = c("e", "t"), | |
labels = c("enlarging", "tapering")) | |
raw$StalkRoot <- factor(raw$StalkRoot, | |
levels = c("b", "c", "u", "e", "z", "r", "?"), | |
labels = c("bulbous", "club", "cup", "equal", | |
"rhizomorphs", "rooted", "missing")) | |
raw$StalkSurfaceAboveRing <- factor(raw$StalkSurfaceAboveRing, | |
levels = c("f", "y", "k", "s"), | |
labels = c("fibrous", "scaly", "silky", | |
"smooth")) | |
raw$StalkSurfaceBelowRing <- factor(raw$StalkSurfaceBelowRing, | |
levels = c("f", "y", "k", "s"), | |
labels = c("fibrous", "scaly", "silky", | |
"smooth")) | |
raw$StalkColorAboveRing <- factor(raw$StalkColorAboveRing, | |
levels = c("n", "b", "c", "g", "o", "p", "e", | |
"w", "y"), | |
labels = c("brown", "buff", "cinnamon", "gray", | |
"orange", "pink", "red", "white", | |
"yellow")) | |
raw$StalkColorBelowRing <- factor(raw$StalkColorBelowRing, | |
levels = c("n", "b", "c", "g", "o", "p", "e", | |
"w", "y"), | |
labels = c("brown", "buff", "cinnamon", | |
"gray", "orange", "pink", "red", | |
"white", "yellow")) | |
raw$VeilType <- factor(raw$VeilType, | |
levels = c("p", "u"), | |
labels = c("partial", "universal")) | |
raw$VeilColor <- factor(raw$VeilColor, | |
levels = c("n", "o", "w", "y"), | |
labels = c("brown", "orange", "white", "yellow")) | |
raw$RingNumber <- factor(raw$RingNumber, | |
levels = c("n", "o", "t"), | |
labels = c("none", "one", "two")) | |
raw$RingType <- factor(raw$RingType, | |
levels = c("c", "e", "f", "l", "n", "p", "s", "z"), | |
labels = c("cobwebby", "evanescent", "flaring", | |
"large", "none", "pendant", "sheathing", | |
"zone")) | |
raw$SporePrintColor <- factor(raw$SporePrintColor, | |
levels = c("k", "n", "b", "h", "r", "o", "u", | |
"w", "y"), | |
labels = c("black", "brown", "buff", "chocolate", | |
"green", "orange", "purple", "white", | |
"yellow")) | |
raw$Population <- factor(raw$Population, | |
levels = c("a", "c", "n", "s", "v", "y"), | |
labels = c("abundant", "clustered", "numerous", | |
"scattered", "several", "solitary")) | |
raw$Habitat <- factor(raw$Habitat, | |
levels = c("g", "l", "m", "p", "u", "w", "d"), | |
labels = c("grasses", "leaves", "meadows", "paths", | |
"urban", "waste", "woods")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment