MattSandy · December 12, 2018 23:03
diff --git a/run.R b/run.R
 #https://www.washingtonpost.com/news/the-fix/wp/2016/09/26/the-first-trump-clinton-presidential-debate-transcript-annotated/
 transcript <- read.csv(file="~/R/reddit/speach/import/transcript.csv",header = FALSE, stringsAsFactors = FALSE)
 colnames(transcript) <- c("candidate","statement")
 transcript_melted <- matrix(data="NA",nrow = 0,ncol = 2)
 for(i in 1:nrow(transcript)) {
  #removes non alphanumeric, then splits statement into a vector of words
  words <- unlist(strsplit(gsub("[^[:alnum:] \']", "", transcript[i,"statement"]), " "))
  for(word in words) {
    transcript_melted <- rbind(transcript_melted, c(transcript[i,"candidate"],word))
  }
 }
 transcript_melted <- data.frame(transcript_melted)
 colnames(transcript_melted) <- c("candidate","word")
 #remove spaces and empty values
 transcript_melted <- transcript_melted[which(transcript_melted$word!=""),]
 transcript_melted <- transcript_melted[which(transcript_melted$word!=" "),]

 transcript_melted$word <- tolower(transcript_melted$word)

 #Removes Lester Holt from Candidates
 transcript_melted <- transcript_melted[which(transcript_melted$candidate!="LESTER HOLT"),]
 transcript_melted$candidate <- factor(transcript_melted$candidate)

 summary <- data.frame(table(transcript_melted$candidate, transcript_melted$word))
 summary <- summary[order(-summary$Freq),]
 print(summary[1:200,],row.names=FALSE)

 #Find some differences
 export_table <- matrix(nrow = 0,ncol = 4)
 for(word in unique(transcript_melted$word)) {
  #grab the counts
  clinton <- nrow(transcript_melted[which(transcript_melted$word==word&transcript_melted$candidate=="CLINTON"),])
  trump <- nrow(transcript_melted[which(transcript_melted$word==word&transcript_melted$candidate=="TRUMP"),])
  if((clinton/trump>2)||(trump/clinton>2)) {
    print(word)
    print(table((transcript_melted[which(transcript_melted$word==word),"candidate"])))
    #gets the number of times the word has been said by each candidate
    
    #append export_table
    export_table <- rbind(export_table,c("TRUMP",word,trump,trump/clinton))
    export_table <- rbind(export_table,c("CLINTON",word,clinton,trump/clinton))
  }
 }
 #Clinton said more
 for(word in unique(transcript_melted$word)) {
  clinton <- nrow(transcript_melted[which(transcript_melted$word==word&transcript_melted$candidate=="CLINTON"),])
  trump <- nrow(transcript_melted[which(transcript_melted$word==word&transcript_melted$candidate=="TRUMP"),])
  if((clinton>trump)&(trump>0)) {
    print(word)
    print(table((transcript_melted[which(transcript_melted$word==word),"candidate"])))
  }
 }
 write.csv(export_table,file="~/R/reddit/speach/export/export_table.csv", row.names = FALSE)
 write.csv(transcript_melted,file="~/R/reddit/speach/export/transcript_melted.csv", row.names = FALSE)

 #fun bits
 #How Many Words Trump Said over Hilary
 nrow(transcript_melted[which(transcript_melted$candidate=="TRUMP"),])/nrow(transcript_melted[which(transcript_melted$candidate=="CLINTON"),])
 #Tremendous
 summary[which(summary$Var2=="tremendous"),]
 summary[which(summary$Var2=="very"),]
 summary[which(summary$Var2=="important"),]
 summary[which(summary$Var2=="wrong"),]
	#https://www.washingtonpost.com/news/the-fix/wp/2016/09/26/the-first-trump-clinton-presidential-debate-transcript-annotated/
	transcript <- read.csv(file="~/R/reddit/speach/import/transcript.csv",header = FALSE, stringsAsFactors = FALSE)
	colnames(transcript) <- c("candidate","statement")
	transcript_melted <- matrix(data="NA",nrow = 0,ncol = 2)
	for(i in 1:nrow(transcript)) {
	#removes non alphanumeric, then splits statement into a vector of words
	words <- unlist(strsplit(gsub("[^[:alnum:] \']", "", transcript[i,"statement"]), " "))
	for(word in words) {
	transcript_melted <- rbind(transcript_melted, c(transcript[i,"candidate"],word))
	}
	}
	transcript_melted <- data.frame(transcript_melted)
	colnames(transcript_melted) <- c("candidate","word")
	#remove spaces and empty values
	transcript_melted <- transcript_melted[which(transcript_melted$word!=""),]
	transcript_melted <- transcript_melted[which(transcript_melted$word!=" "),]

	transcript_melted$word <- tolower(transcript_melted$word)

	#Removes Lester Holt from Candidates
	transcript_melted <- transcript_melted[which(transcript_melted$candidate!="LESTER HOLT"),]
	transcript_melted$candidate <- factor(transcript_melted$candidate)

	summary <- data.frame(table(transcript_melted$candidate, transcript_melted$word))
	summary <- summary[order(-summary$Freq),]
	print(summary[1:200,],row.names=FALSE)

	#Find some differences
	export_table <- matrix(nrow = 0,ncol = 4)
	for(word in unique(transcript_melted$word)) {
	#grab the counts
	clinton <- nrow(transcript_melted[which(transcript_melted$word==word&transcript_melted$candidate=="CLINTON"),])
	trump <- nrow(transcript_melted[which(transcript_melted$word==word&transcript_melted$candidate=="TRUMP"),])
	if((clinton/trump>2)\|\|(trump/clinton>2)) {
	print(word)
	print(table((transcript_melted[which(transcript_melted$word==word),"candidate"])))
	#gets the number of times the word has been said by each candidate

	#append export_table
	export_table <- rbind(export_table,c("TRUMP",word,trump,trump/clinton))
	export_table <- rbind(export_table,c("CLINTON",word,clinton,trump/clinton))
	}
	}
	#Clinton said more
	for(word in unique(transcript_melted$word)) {
	clinton <- nrow(transcript_melted[which(transcript_melted$word==word&transcript_melted$candidate=="CLINTON"),])
	trump <- nrow(transcript_melted[which(transcript_melted$word==word&transcript_melted$candidate=="TRUMP"),])
	if((clinton>trump)&(trump>0)) {
	print(word)
	print(table((transcript_melted[which(transcript_melted$word==word),"candidate"])))
	}
	}
	write.csv(export_table,file="~/R/reddit/speach/export/export_table.csv", row.names = FALSE)
	write.csv(transcript_melted,file="~/R/reddit/speach/export/transcript_melted.csv", row.names = FALSE)

	#fun bits
	#How Many Words Trump Said over Hilary
	nrow(transcript_melted[which(transcript_melted$candidate=="TRUMP"),])/nrow(transcript_melted[which(transcript_melted$candidate=="CLINTON"),])
	#Tremendous
	summary[which(summary$Var2=="tremendous"),]
	summary[which(summary$Var2=="very"),]
	summary[which(summary$Var2=="important"),]
	summary[which(summary$Var2=="wrong"),]