diamonaj · January 13, 2023 17:39
diff --git a/Lesson1_pcw_answers b/Lesson1_pcw_answers
 #############
 # Answer key to pre-class work for Lesson 1


 #***Step 1:

 #Load dataset 
 #Note: This step may take several seconds to complete
 # click the link for the data set and see that it's a .csv file
 # so use "read.csv" -- also, don't name this data object "data"
 # because "data()" is a built-in R function. Choose another name.
 # mm <- read.csv("https://tinyurl.com/UNpckpdata")
 # head(mm)
 un_data <- read.csv("https://tinyurl.com/UNpckpdata")
 head(un_data)
 nrow(un_data)
 ncol(un_data)
 # the original data contains 147631 rows and 9 columns 

 #***Step 2
 #Check each column for missing values and/or NAs and drop the observations that have them.
 #HINT: Use the function na.omit after dealing with all the blanks by setting them to NAs. 
 #for example: https://www.youtube.com/watch?v=O_gPPrezk5o
 clean_data <- na.omit(un_data)
 nrow(clean_data)
 # 147626

 ncol(clean_data)
 #9

 #### How many observations (rows) are there in the original data set (before removing NAs)?
 #147631 

 #### How many observations (rows) are there in the cleaned data set (after removing NAs)?
 # 147626

 #### How many columns are there, and what are the names of the first column and the last column?
 # 9 columns

 # names(clean_data)
 #[1] "Contribution_ID"      "ISOCode3"             "M49_Code"             "Contributing_Country"
 #[5] "Mission_Acronym"      "Personnel_Type"       "Female_Personnel"     "Male_Personnel"      
 #[9] "Last_Reporting_Date" 

 # First column name is "Contribution_ID"
 # Last column name is "Last Reporting Date"
	#############
	# Answer key to pre-class work for Lesson 1


	#***Step 1:

	#Load dataset
	#Note: This step may take several seconds to complete
	# click the link for the data set and see that it's a .csv file
	# so use "read.csv" -- also, don't name this data object "data"
	# because "data()" is a built-in R function. Choose another name.
	# mm <- read.csv("https://tinyurl.com/UNpckpdata")
	# head(mm)
	un_data <- read.csv("https://tinyurl.com/UNpckpdata")
	head(un_data)
	nrow(un_data)
	ncol(un_data)
	# the original data contains 147631 rows and 9 columns

	#***Step 2
	#Check each column for missing values and/or NAs and drop the observations that have them.
	#HINT: Use the function na.omit after dealing with all the blanks by setting them to NAs.
	#for example: https://www.youtube.com/watch?v=O_gPPrezk5o
	clean_data <- na.omit(un_data)
	nrow(clean_data)
	# 147626

	ncol(clean_data)
	#9

	#### How many observations (rows) are there in the original data set (before removing NAs)?
	#147631

	#### How many observations (rows) are there in the cleaned data set (after removing NAs)?
	# 147626

	#### How many columns are there, and what are the names of the first column and the last column?
	# 9 columns

	# names(clean_data)
	#[1] "Contribution_ID" "ISOCode3" "M49_Code" "Contributing_Country"
	#[5] "Mission_Acronym" "Personnel_Type" "Female_Personnel" "Male_Personnel"
	#[9] "Last_Reporting_Date"

	# First column name is "Contribution_ID"
	# Last column name is "Last Reporting Date"
No results found