library(tidymodels)
data(ames)
set.seed(833961)
ames_split <- initial_split(ames, prob = 0.80, strata = Sale_Price)
ames_train <- training(ames_split)
ames_test <- testing(ames_split)
ames_rec <- recipe(Sale_Price ~ Neighborhood + Gr_Liv_Area + Year_Built + Bldg_Type,
data = ames_train) %>%
step_log(Sale_Price, skip = TRUE, base = 10) %>%
step_dummy(all_nominal())
## skipped
prep(ames_rec) %>% bake(ames_train)
#> # A tibble: 2,199 x 35
#> Gr_Liv_Area Year_Built Sale_Price Neighborhood_Co… Neighborhood_Ol…
#> <int> <int> <int> <dbl> <dbl>
#> 1 1656 1960 215000 0 0
#> 2 896 1961 105000 0 0
#> 3 1329 1958 172000 0 0
#> 4 1604 1998 195500 0 0
#> 5 1338 2001 213500 0 0
#> 6 1280 1992 191500 0 0
#> 7 1616 1995 236500 0 0
#> 8 1804 1999 189000 0 0
#> 9 1655 1993 175900 0 0
#> 10 1187 1992 185000 0 0
#> # … with 2,189 more rows, and 30 more variables: Neighborhood_Edwards <dbl>,
#> # Neighborhood_Somerset <dbl>, Neighborhood_Northridge_Heights <dbl>,
#> # Neighborhood_Gilbert <dbl>, Neighborhood_Sawyer <dbl>,
#> # Neighborhood_Northwest_Ames <dbl>, Neighborhood_Sawyer_West <dbl>,
#> # Neighborhood_Mitchell <dbl>, Neighborhood_Brookside <dbl>,
#> # Neighborhood_Crawford <dbl>, Neighborhood_Iowa_DOT_and_Rail_Road <dbl>,
#> # Neighborhood_Timberland <dbl>, Neighborhood_Northridge <dbl>,
#> # Neighborhood_Stone_Brook <dbl>,
#> # Neighborhood_South_and_West_of_Iowa_State_University <dbl>,
#> # Neighborhood_Clear_Creek <dbl>, Neighborhood_Meadow_Village <dbl>,
#> # Neighborhood_Briardale <dbl>, Neighborhood_Bloomington_Heights <dbl>,
#> # Neighborhood_Veenker <dbl>, Neighborhood_Northpark_Villa <dbl>,
#> # Neighborhood_Blueste <dbl>, Neighborhood_Greens <dbl>,
#> # Neighborhood_Green_Hills <dbl>, Neighborhood_Landmark <dbl>,
#> # Neighborhood_Hayden_Lake <dbl>, Bldg_Type_TwoFmCon <dbl>,
#> # Bldg_Type_Duplex <dbl>, Bldg_Type_Twnhs <dbl>, Bldg_Type_TwnhsE <dbl>
## not skipped
prep(ames_rec) %>% juice()
#> # A tibble: 2,199 x 35
#> Gr_Liv_Area Year_Built Sale_Price Neighborhood_Co… Neighborhood_Ol…
#> <int> <int> <dbl> <dbl> <dbl>
#> 1 1656 1960 5.33 0 0
#> 2 896 1961 5.02 0 0
#> 3 1329 1958 5.24 0 0
#> 4 1604 1998 5.29 0 0
#> 5 1338 2001 5.33 0 0
#> 6 1280 1992 5.28 0 0
#> 7 1616 1995 5.37 0 0
#> 8 1804 1999 5.28 0 0
#> 9 1655 1993 5.25 0 0
#> 10 1187 1992 5.27 0 0
#> # … with 2,189 more rows, and 30 more variables: Neighborhood_Edwards <dbl>,
#> # Neighborhood_Somerset <dbl>, Neighborhood_Northridge_Heights <dbl>,
#> # Neighborhood_Gilbert <dbl>, Neighborhood_Sawyer <dbl>,
#> # Neighborhood_Northwest_Ames <dbl>, Neighborhood_Sawyer_West <dbl>,
#> # Neighborhood_Mitchell <dbl>, Neighborhood_Brookside <dbl>,
#> # Neighborhood_Crawford <dbl>, Neighborhood_Iowa_DOT_and_Rail_Road <dbl>,
#> # Neighborhood_Timberland <dbl>, Neighborhood_Northridge <dbl>,
#> # Neighborhood_Stone_Brook <dbl>,
#> # Neighborhood_South_and_West_of_Iowa_State_University <dbl>,
#> # Neighborhood_Clear_Creek <dbl>, Neighborhood_Meadow_Village <dbl>,
#> # Neighborhood_Briardale <dbl>, Neighborhood_Bloomington_Heights <dbl>,
#> # Neighborhood_Veenker <dbl>, Neighborhood_Northpark_Villa <dbl>,
#> # Neighborhood_Blueste <dbl>, Neighborhood_Greens <dbl>,
#> # Neighborhood_Green_Hills <dbl>, Neighborhood_Landmark <dbl>,
#> # Neighborhood_Hayden_Lake <dbl>, Bldg_Type_TwoFmCon <dbl>,
#> # Bldg_Type_Duplex <dbl>, Bldg_Type_Twnhs <dbl>, Bldg_Type_TwnhsE <dbl>
## skipped
prep(ames_rec) %>% bake(ames_test)
#> # A tibble: 731 x 35
#> Gr_Liv_Area Year_Built Sale_Price Neighborhood_Co… Neighborhood_Ol…
#> <int> <int> <int> <dbl> <dbl>
#> 1 2110 1968 244000 0 0
#> 2 1629 1997 189900 0 0
#> 3 1341 1990 171500 0 0
#> 4 1856 2010 394432 0 0
#> 5 1844 1977 190000 0 0
#> 6 1173 1974 170000 0 0
#> 7 1056 1968 142000 0 0
#> 8 864 1971 115000 0 0
#> 9 1704 2007 306000 0 0
#> 10 1822 2005 259000 0 0
#> # … with 721 more rows, and 30 more variables: Neighborhood_Edwards <dbl>,
#> # Neighborhood_Somerset <dbl>, Neighborhood_Northridge_Heights <dbl>,
#> # Neighborhood_Gilbert <dbl>, Neighborhood_Sawyer <dbl>,
#> # Neighborhood_Northwest_Ames <dbl>, Neighborhood_Sawyer_West <dbl>,
#> # Neighborhood_Mitchell <dbl>, Neighborhood_Brookside <dbl>,
#> # Neighborhood_Crawford <dbl>, Neighborhood_Iowa_DOT_and_Rail_Road <dbl>,
#> # Neighborhood_Timberland <dbl>, Neighborhood_Northridge <dbl>,
#> # Neighborhood_Stone_Brook <dbl>,
#> # Neighborhood_South_and_West_of_Iowa_State_University <dbl>,
#> # Neighborhood_Clear_Creek <dbl>, Neighborhood_Meadow_Village <dbl>,
#> # Neighborhood_Briardale <dbl>, Neighborhood_Bloomington_Heights <dbl>,
#> # Neighborhood_Veenker <dbl>, Neighborhood_Northpark_Villa <dbl>,
#> # Neighborhood_Blueste <dbl>, Neighborhood_Greens <dbl>,
#> # Neighborhood_Green_Hills <dbl>, Neighborhood_Landmark <dbl>,
#> # Neighborhood_Hayden_Lake <dbl>, Bldg_Type_TwoFmCon <dbl>,
#> # Bldg_Type_Duplex <dbl>, Bldg_Type_Twnhs <dbl>, Bldg_Type_TwnhsE <dbl>Created on 2020-08-25 by the reprex package (v0.3.0.9001)