- Make sure you are in the folder that contains the
Dockerfile - If your folder is
/my-docker-image/, there should be 2 files in your folder:/my-docker-image | |---Dockerfile |---requirements.txt
| require(GGally) | |
| lm.plt <- function(data, mapping, ...){ | |
| plt <- ggplot(data = data, mapping = mapping) + | |
| geom_point(shape = 20, alpha = 0.7, color = 'darkseagreen') + | |
| geom_smooth(method=loess, fill="red", color="red") + | |
| geom_smooth(method=lm, fill="blue", color="blue") + | |
| theme_minimal() | |
| return(plt) | |
| } |
| list.of.packages <- c("ggplot2", "parallel", "tidyverse", "pROC", "caret", "corrplot", "doParallel", "dummies", "futile.logger") | |
| new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])] | |
| # Check whether the packages listed are installed or not | |
| # If not then they are installed | |
| if(length(new.packages)) { | |
| print("Installing new packages") | |
| install.packages(new.packages, repos = "http://cran.us.r-project.org") | |
| } |
| source("./src/load_package.R") | |
| flog.info("Loading the German Credit Card Dataset") | |
| # Load Dataset | |
| german_credit <- read.table("./assets/data/german.data", fileEncoding="UTF-8" , dec = ",") | |
| head(german_credit) | |
| flog.info("Renaming the Columns") | |
| colnames(german_credit) <- c('status', 'duration', 'credit_history', 'purpose', 'credit_amount', 'savings_account', 'employment', 'installment_rate','status_sex', 'guarantors', 'residence', 'property', 'age', 'other_installment', 'housing', 'existing_credits', 'job', 'maintainence_people','telephone', 'foreign', 'rating') |
| source("./src/eda.R") | |
| # Importing the intermediate data | |
| flog.info("Loading the intermediate data") | |
| german_credit <- readRDS("./assets/intermediate-files/intermediate_german_data.rds") | |
| german_credit$rating <- ifelse(german_credit$rating == 1, "good", "bad") | |
| # Checking for missing values | |
| # unlist(lapply(german_credit, function(x) sum(is.na(x)))) |
| #setwd("~/difference-engine/docker-for-data-science-r/") | |
| source("./src/fe-train.R") | |
| set.seed(42) | |
| # Parallelizing the modelling | |
| # NOTE: Try not to use all the cores | |
| doParallel::registerDoParallel(parallel::detectCores() - 2) | |
| # Write the ML Code here |
| FROM rocker/tidyverse | |
| RUN apt-get update && apt-get -y upgrade && apt-get install -y \ | |
| build-essential libssl-dev libffi-dev libxml2-dev libcurl4-openssl-dev | |
| RUN mkdir /home/rstudio/data /home/rstudio/models | |
| VOLUME ['/home/rstudio/data', '/home/rstudio/models'] | |
| RUN Rscript -e "install.packages(c('dummy', 'corrplot', 'pROC'), dependencies=TRUE)" |
| FROM rocker/r-base | |
| RUN apt-get update && apt-get -y upgrade && apt-get install -y \ | |
| build-essential libssl-dev libffi-dev libxml2-dev libcurl4-openssl-dev | |
| RUN Rscript -e "install.packages(c('caret', 'tidyverse', 'gbm', 'pROC', 'corrplot', 'doParallel', 'dummies', 'futile.logger'), dependencies=TRUE)" | |
| ENV INSTALL_PATH /germancc | |
| RUN mkdir -p $INSTALL_PATH |
| def timeit(method): | |
| def timing(*args, **kwargs): | |
| timings = [] | |
| print("Running this 1000 loops, for benchmarking") | |
| for i in range(1000): | |
| start = time.time() | |
| result = method(*args, **kwargs) | |
| end = time.time() | |