arcaravaggi · January 31, 2018 11:51
diff --git a/maxDup_NAs.R b/maxDup_NAs.R
 # Create a dataframe of example data (here derived from raptor growth metrics)
 df <- matrix(c(24,NA,365,1,NA,NA,6,33,NA,10,59,NA,37,300,477,NA,233,312,NA,NA,450,4,28,49),byrow = T, 8,3)
 colnames(df) <- c("age","wing","mass")
 df <- as.data.frame(df)

 df$loc=c("Flev","Flev","Flev","Ters","Ters","Schi","Schi","Schi")
 df$yr=c("2004","2004","2004","2007","2007","2004","2004","2008")
 df

 # split data by focal column and remove duplicates from a span to columns, keeping the one with the most data
 res <- do.call(rbind,lapply(split(df, df$loc), 
                            function(x) x[which.max(rowSums(!is.na(x[1:3]))),]))
 row.names(res) <- NULL
 res

 # Note that this is not perfect. In this example, there are two rows with two cells containing data for 
 # location 'Flav'. R has chosen to retain the row with the greatest total value (re: `which.max`)
	# Create a dataframe of example data (here derived from raptor growth metrics)
	df <- matrix(c(24,NA,365,1,NA,NA,6,33,NA,10,59,NA,37,300,477,NA,233,312,NA,NA,450,4,28,49),byrow = T, 8,3)
	colnames(df) <- c("age","wing","mass")
	df <- as.data.frame(df)

	df$loc=c("Flev","Flev","Flev","Ters","Ters","Schi","Schi","Schi")
	df$yr=c("2004","2004","2004","2007","2007","2004","2004","2008")
	df

	# split data by focal column and remove duplicates from a span to columns, keeping the one with the most data
	res <- do.call(rbind,lapply(split(df, df$loc),
	function(x) x[which.max(rowSums(!is.na(x[1:3]))),]))
	row.names(res) <- NULL
	res

	# Note that this is not perfect. In this example, there are two rows with two cells containing data for
	# location 'Flav'. R has chosen to retain the row with the greatest total value (re: `which.max`)