MattSandy’s gists

MattSandy / app.js

Created June 2, 2016 18:46

Reddit Scraper

	var http = require('http');
	var https = require('https');
	var fs = require('fs');
	var post_array = [];
	var user_array = [];
	fs.writeFile('posts.csv', 'Author,ID,Post Date,Comments,Score,Stickied,Pull,Subreddit\n', function(){console.log('done')});
	fs.writeFile('users.csv', 'Author,Author Date\n', function(){console.log('done')});

	var subreddits = ["AskReddit", "politics", "The_Donald", "funny", "nba", "Overwatch", "gaming", "pics", "news", "todayilearned", "videos", "movies", "worldnews", "soccer", "relationships", "WTF", "SandersForPresident", "AdviceAnimals", "gifs", "leagueoflegends", "aww", "BlackPeopleTwitter", "hockey", "SquaredCircle", "gameofthrones", "nfl", "GlobalOffensive", "h3h3productions", "arrow", "DotA2", "pcmasterrace", "hiphopheads", "me_irl", "Showerthoughts", "science", "Mr_Trump", "Games", "mildlyinteresting", "asoiaf", "technology", "hillaryclinton", "IAmA", "4chan", "hearthstone", "TumblrInAction", "magicTCG", "Jokes", "PoliticalDiscussion", "tifu", "baseball", "CFB", "enoughsanderss

MattSandy / run.R

Last active June 20, 2016 21:19

Find the Minimum Distance Between Two Points, and Their Coordinates

	set.seed(1)
	df <- data.frame(x=rnorm(10), y=rnorm(10))
	d1 <- dist(df)

	min(d1)
	#0.2036045
	which.min(d1)
	#43
	df[combn(row.names(df),2)[,match(min(d1),d1)],]
	# x y

MattSandy / run.R

Last active December 12, 2018 23:03

Use R and Tableau to Analyze Text from Presidential Debate

	#https://www.washingtonpost.com/news/the-fix/wp/2016/09/26/the-first-trump-clinton-presidential-debate-transcript-annotated/
	transcript <- read.csv(file="~/R/reddit/speach/import/transcript.csv",header = FALSE, stringsAsFactors = FALSE)
	colnames(transcript) <- c("candidate","statement")
	transcript_melted <- matrix(data="NA",nrow = 0,ncol = 2)
	for(i in 1:nrow(transcript)) {
	#removes non alphanumeric, then splits statement into a vector of words
	words <- unlist(strsplit(gsub("[^[:alnum:] \']", "", transcript[i,"statement"]), " "))
	for(word in words) {
	transcript_melted <- rbind(transcript_melted, c(transcript[i,"candidate"],word))
	}

MattSandy / run.R

Last active October 16, 2016 19:37

Second Trump vs. Clinton Debate

	#https://www.washingtonpost.com/news/the-fix/wp/2016/10/09/everything-that-was-said-at-the-second-donald-trump-vs-hillary-clinton-debate-highlighted/
	transcript <- read.csv(file="~/R/reddit/speach/import/transcript-2nd-debate.csv",header = FALSE, stringsAsFactors = FALSE)
	colnames(transcript) <- c("candidate","statement")
	transcript_melted <- matrix(data="NA",nrow = 0,ncol = 2)
	for(i in 1:nrow(transcript)) {
	#removes non alphanumeric, then splits statement into a vector of words
	words <- unlist(strsplit(gsub("[^[:alnum:] \']", "", transcript[i,"statement"]), " "))
	for(word in words) {
	transcript_melted <- rbind(transcript_melted, c(transcript[i,"candidate"],word))
	}

MattSandy / run.R

Last active October 21, 2016 22:18

Final Presidential Trump vs Clinton Debate

	#https://www.washingtonpost.com/news/the-fix/wp/2016/10/19/the-final-trump-clinton-debate-transcript-annotated/
	transcript <- read.csv(file="~/R/reddit/speach/import/transcript-3rd-debate.csv",header = FALSE, stringsAsFactors = FALSE)
	colnames(transcript) <- c("candidate","statement")
	transcript_melted <- matrix(data="NA",nrow = 0,ncol = 2)
	for(i in 1:nrow(transcript)) {
	#removes non alphanumeric, then splits statement into a vector of words
	words <- unlist(strsplit(gsub("[^[:alnum:] ]", "", transcript[i,"statement"]), " "))
	for(word in words) {
	transcript_melted <- rbind(transcript_melted, c(transcript[i,"candidate"],word))
	}

MattSandy / app.js

Created October 27, 2016 16:43

Reddit Front Page Monitor

	var http = require('http');
	var https = require('https');
	var fs = require('fs');
	var post_array = [];
	var user_array = [];
	fs.writeFile('posts.csv', 'Author,ID,Post Date,Comments,Score,Stickied,Pull,Subreddit\n', function(){console.log('Cleared posts.csv')});
	fs.writeFile('users.csv', 'Author,Author Date\n', function(){console.log('Cleared users.csv')});

	subreddits = ["all"]
	for (var i=0;i<subreddits.length; i++) {

MattSandy / run.R

Last active December 3, 2016 20:27

Update and Append Multiple Files in R

	setwd("~/R/Merge Stuff")
	install.packages("openxlsx")
	library("openxlsx")

	file <- list()
	#base file is the original you are working from
	#update file is the file with new information which updates base cells
	#error file contains information in new columns which are appended
	file$base <- read.xlsx("base.xlsx")
	file$update <- read.xlsx("update.xlsx")

MattSandy / functions_html_table.R

Last active April 7, 2017 00:31

Create html table from dataframe

	html.data.frame <- function(table,id="records") {
	df <- data.frame(table)
	for(i in 1:ncol(df)) {
	df[,i] <- as.character(df[,i])
	df[,i] <- gsub("&", "&", df[,i])
	df[,i] <- gsub("<", "<", df[,i])
	df[,i] <- gsub(">", "&gt", df[,i])
	}
	html <- paste0('<table id="',id,'">')
	html <- paste0(html,"<thead><tr><td>",paste(names(df),collapse = "</td><td>"),"</td></tr></thead><tbody>")

MattSandy / termination_words.R

Last active April 7, 2017 05:26

Returns the vector of each word found before the end of a sentence.

	termination_words <- toupper(as.character(sapply(unlist(strsplit(text,'\\.')),function(x) {
	words <- unlist(strsplit(x,' '))
	return(words[length(words)])
	})))

MattSandy / app.js

Last active July 19, 2017 02:43

illinoisreportcard.com scraper

	let http = require('http');
	let https = require('https');
	let fs = require('fs');
	let cheerio = require('cheerio')
	var letters = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z'];

	fs.writeFile('report.csv', "Name,Grades,County,City,Link\r\n", function(){console.log('done')});
	for(var i=0;i<letters.length;i++) {
	scrape(letters[i]);
	}

Matt Sandy MattSandy