This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
su - root | |
# gpinitsystem fails to change the postgresql.conf port setting without ed | |
yum install ed unzip -y | |
groupadd -g 8000 gpadmin | |
useradd -m -s /bin/bash -d /home/gpadmin -g gpadmin -u 8000 gpadmin | |
passwd gpadmin | |
mkdir -p /data/master |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load the raw training data and replace missing values with NA | |
training.data.raw <- read.csv('train.csv',header=T,na.strings=c("")) | |
# Output the number of missing values for each column | |
sapply(training.data.raw,function(x) sum(is.na(x))) | |
# Quick check for how many different values for each feature | |
sapply(training.data.raw, function(x) length(unique(x))) | |
# A visual way to check for missing data |