Skip to content

Instantly share code, notes, and snippets.

@ericfourrier
Created February 7, 2016 20:25
Show Gist options
  • Save ericfourrier/9560eb9d4d5355cc41ac to your computer and use it in GitHub Desktop.
Save ericfourrier/9560eb9d4d5355cc41ac to your computer and use it in GitHub Desktop.
# Load library ------------------------------------------------------------
library(dplyr)
# Basic functions to explore NA in Dataframe --------------------------------------------------------------
# Identify all missing variables per columns
countNaCol <- function(data) {
result <- vapply(data,function(x) sum(is.na(x)),integer(1))
result <- data.frame(Nbna = result, napercentage = round(result/nrow(data),3))
names(result) <- c('Nbna', 'napercentage')
return(result)
}
# Identify missing Columns (only na values)
naColumns <- function(data) names(data)[vapply(data,
function(x) sum(is.na(x)) == nrow(data),
logical(1))]
# Identify many missing
naColumnsM <- function(data,a=0.7) names(data)[vapply(data,
function(x) sum(is.na(x)) > a * nrow(data),
logical(1))]
# nb of na per row
countNaRow <- function(data) rowSums(mutate_each(data,funs(is.na)))/ncol(data)
naRows <- function(data) which(countNaRow(data) == 1)
#all complete rows ( no na values )
completeRows <- function(data) which(countNaRow(data) == 0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment