fformenti · June 19, 2014 07:21
diff --git a/billionaires.R b/billionaires.R
 # Source: http://www.forbes.com/billionaires/list/#tab:overall
 # DateTaken: March 4th, 2014

 data_path_original <- '../data/original/'
 data_path <- '../data/'


 # Load Data
 df <- read.csv(paste0(data_path_original,"billionaires.txt"), header = TRUE, sep = "\t", stringsAsFactors=FALSE)
 df <- head(df,1645)



 # DataCleanup
 names(df)[6] <- c("Country")
 df$Country <- gsub(pattern= "ChinaRank", replacement="China", x=df$Country)
 df$Country <- gsub(pattern= "Czech Republic", replacement="Czech Rep.", x=df$Country)
 df$Country <- gsub(pattern= "Vietnam", replacement="Viet Nam", x=df$Country)
 df$Country <- gsub(pattern= "United Arab Emirates", replacement="United Arab Emirates", x=df$Country)
 df$Country <- gsub(pattern= "Macau", replacement="China", x=df$Country)
 df$Country <- gsub(pattern= "Hong Kong", replacement="China", x=df$Country)


 df$NetWorth <- gsub(pattern= "[$]", replacement="", x=df$NetWorth)
 df$NetWorth <- lapply(strsplit(x = df$NetWorth, " "), FUN = function(x){paste(x[1])})
 df$NetWorth <- as.numeric(df$NetWorth)


 #Writing data
 write.csv2(df, file = paste0(data_path, "Master_dataframe.csv"), row.names = F)
	# Source: http://www.forbes.com/billionaires/list/#tab:overall
	# DateTaken: March 4th, 2014

	data_path_original <- '../data/original/'
	data_path <- '../data/'


	# Load Data
	df <- read.csv(paste0(data_path_original,"billionaires.txt"), header = TRUE, sep = "\t", stringsAsFactors=FALSE)
	df <- head(df,1645)



	# DataCleanup
	names(df)[6] <- c("Country")
	df$Country <- gsub(pattern= "ChinaRank", replacement="China", x=df$Country)
	df$Country <- gsub(pattern= "Czech Republic", replacement="Czech Rep.", x=df$Country)
	df$Country <- gsub(pattern= "Vietnam", replacement="Viet Nam", x=df$Country)
	df$Country <- gsub(pattern= "United Arab Emirates", replacement="United Arab Emirates", x=df$Country)
	df$Country <- gsub(pattern= "Macau", replacement="China", x=df$Country)
	df$Country <- gsub(pattern= "Hong Kong", replacement="China", x=df$Country)


	df$NetWorth <- gsub(pattern= "[$]", replacement="", x=df$NetWorth)
	df$NetWorth <- lapply(strsplit(x = df$NetWorth, " "), FUN = function(x){paste(x[1])})
	df$NetWorth <- as.numeric(df$NetWorth)


	#Writing data
	write.csv2(df, file = paste0(data_path, "Master_dataframe.csv"), row.names = F)