Skip to content

Instantly share code, notes, and snippets.

View MattSandy's full-sized avatar
💭
Hungry

Matt Sandy MattSandy

💭
Hungry
View GitHub Profile
@MattSandy
MattSandy / app.js
Created June 2, 2016 18:46
Reddit Scraper
var http = require('http');
var https = require('https');
var fs = require('fs');
var post_array = [];
var user_array = [];
fs.writeFile('posts.csv', 'Author,ID,Post Date,Comments,Score,Stickied,Pull,Subreddit\n', function(){console.log('done')});
fs.writeFile('users.csv', 'Author,Author Date\n', function(){console.log('done')});
var subreddits = ["AskReddit", "politics", "The_Donald", "funny", "nba", "Overwatch", "gaming", "pics", "news", "todayilearned", "videos", "movies", "worldnews", "soccer", "relationships", "WTF", "SandersForPresident", "AdviceAnimals", "gifs", "leagueoflegends", "aww", "BlackPeopleTwitter", "hockey", "SquaredCircle", "gameofthrones", "nfl", "GlobalOffensive", "h3h3productions", "arrow", "DotA2", "pcmasterrace", "hiphopheads", "me_irl", "Showerthoughts", "science", "Mr_Trump", "Games", "mildlyinteresting", "asoiaf", "technology", "hillaryclinton", "IAmA", "4chan", "hearthstone", "TumblrInAction", "magicTCG", "Jokes", "PoliticalDiscussion", "tifu", "baseball", "CFB", "enoughsanderss
@MattSandy
MattSandy / run.R
Last active June 20, 2016 21:19
Find the Minimum Distance Between Two Points, and Their Coordinates
set.seed(1)
df <- data.frame(x=rnorm(10), y=rnorm(10))
d1 <- dist(df)
min(d1)
#0.2036045
which.min(d1)
#43
df[combn(row.names(df),2)[,match(min(d1),d1)],]
# x y
@MattSandy
MattSandy / run.R
Last active December 12, 2018 23:03
Use R and Tableau to Analyze Text from Presidential Debate
#https://www.washingtonpost.com/news/the-fix/wp/2016/09/26/the-first-trump-clinton-presidential-debate-transcript-annotated/
transcript <- read.csv(file="~/R/reddit/speach/import/transcript.csv",header = FALSE, stringsAsFactors = FALSE)
colnames(transcript) <- c("candidate","statement")
transcript_melted <- matrix(data="NA",nrow = 0,ncol = 2)
for(i in 1:nrow(transcript)) {
#removes non alphanumeric, then splits statement into a vector of words
words <- unlist(strsplit(gsub("[^[:alnum:] \']", "", transcript[i,"statement"]), " "))
for(word in words) {
transcript_melted <- rbind(transcript_melted, c(transcript[i,"candidate"],word))
}
@MattSandy
MattSandy / run.R
Last active October 16, 2016 19:37
Second Trump vs. Clinton Debate
#https://www.washingtonpost.com/news/the-fix/wp/2016/10/09/everything-that-was-said-at-the-second-donald-trump-vs-hillary-clinton-debate-highlighted/
transcript <- read.csv(file="~/R/reddit/speach/import/transcript-2nd-debate.csv",header = FALSE, stringsAsFactors = FALSE)
colnames(transcript) <- c("candidate","statement")
transcript_melted <- matrix(data="NA",nrow = 0,ncol = 2)
for(i in 1:nrow(transcript)) {
#removes non alphanumeric, then splits statement into a vector of words
words <- unlist(strsplit(gsub("[^[:alnum:] \']", "", transcript[i,"statement"]), " "))
for(word in words) {
transcript_melted <- rbind(transcript_melted, c(transcript[i,"candidate"],word))
}
@MattSandy
MattSandy / run.R
Last active October 21, 2016 22:18
Final Presidential Trump vs Clinton Debate
#https://www.washingtonpost.com/news/the-fix/wp/2016/10/19/the-final-trump-clinton-debate-transcript-annotated/
transcript <- read.csv(file="~/R/reddit/speach/import/transcript-3rd-debate.csv",header = FALSE, stringsAsFactors = FALSE)
colnames(transcript) <- c("candidate","statement")
transcript_melted <- matrix(data="NA",nrow = 0,ncol = 2)
for(i in 1:nrow(transcript)) {
#removes non alphanumeric, then splits statement into a vector of words
words <- unlist(strsplit(gsub("[^[:alnum:] ]", "", transcript[i,"statement"]), " "))
for(word in words) {
transcript_melted <- rbind(transcript_melted, c(transcript[i,"candidate"],word))
}
@MattSandy
MattSandy / app.js
Created October 27, 2016 16:43
Reddit Front Page Monitor
var http = require('http');
var https = require('https');
var fs = require('fs');
var post_array = [];
var user_array = [];
fs.writeFile('posts.csv', 'Author,ID,Post Date,Comments,Score,Stickied,Pull,Subreddit\n', function(){console.log('Cleared posts.csv')});
fs.writeFile('users.csv', 'Author,Author Date\n', function(){console.log('Cleared users.csv')});
subreddits = ["all"]
for (var i=0;i<subreddits.length; i++) {
@MattSandy
MattSandy / run.R
Last active December 3, 2016 20:27
Update and Append Multiple Files in R
setwd("~/R/Merge Stuff")
install.packages("openxlsx")
library("openxlsx")
file <- list()
#base file is the original you are working from
#update file is the file with new information which updates base cells
#error file contains information in new columns which are appended
file$base <- read.xlsx("base.xlsx")
file$update <- read.xlsx("update.xlsx")
@MattSandy
MattSandy / functions_html_table.R
Last active April 7, 2017 00:31
Create html table from dataframe
html.data.frame <- function(table,id="records") {
df <- data.frame(table)
for(i in 1:ncol(df)) {
df[,i] <- as.character(df[,i])
df[,i] <- gsub("&", "&amp;", df[,i])
df[,i] <- gsub("<", "&lt;", df[,i])
df[,i] <- gsub(">", "&gt", df[,i])
}
html <- paste0('<table id="',id,'">')
html <- paste0(html,"<thead><tr><td>",paste(names(df),collapse = "</td><td>"),"</td></tr></thead><tbody>")
@MattSandy
MattSandy / termination_words.R
Last active April 7, 2017 05:26
Returns the vector of each word found before the end of a sentence.
termination_words <- toupper(as.character(sapply(unlist(strsplit(text,'\\.')),function(x) {
words <- unlist(strsplit(x,' '))
return(words[length(words)])
})))
@MattSandy
MattSandy / app.js
Last active July 19, 2017 02:43
illinoisreportcard.com scraper
let http = require('http');
let https = require('https');
let fs = require('fs');
let cheerio = require('cheerio')
var letters = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z'];
fs.writeFile('report.csv', "Name,Grades,County,City,Link\r\n", function(){console.log('done')});
for(var i=0;i<letters.length;i++) {
scrape(letters[i]);
}