This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# setup system enviroment for R and MALLET | |
MALLET_HOME <- "c:/mallet-2.0.7" # location of the bin directory | |
Sys.setenv("MALLET_HOME" = MALLET_HOME) | |
Sys.setenv(PATH = "c:/Program Files (x86)/Java/jre7/bin") | |
# configure variables and filenames for MALLET | |
## here using MALLET's built-in example data | |
# set list of topic numbers to iterate over | |
seq <- seq(2, 100, 1) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# R interface with MALLET to loop over different numbers of topics | |
# on a linux machine | |
# first, download MALLET | |
# second, install java | |
# configure variables and filenames for MALLET | |
## here using MALLET's built-in example data | |
# set list of topic numbers to iterate over |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# More detail: http://blog.revolutionanalytics.com/2009/09/how-to-use-a-google-spreadsheet-as-data-in-r.html and | |
# http://exploredata.wordpress.com/2012/08/20/importing-a-google-spreadsheet-into-r/ | |
googsheet <- "full URL of google doc here, must end with &output=csv" | |
require(RCurl) | |
options(RCurlOptions = list(capath = system.file("CurlSSL", "cacert.pem", package = "RCurl"), ssl.verifypeer = FALSE)) | |
myCsv <- getURL(googsheet) | |
data <- read.csv(textConnection(myCsv), stringsAsFactors = FALSE) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(sqldf) | |
sqldf("SELECT | |
day | |
, avg(temp) as avg_temp | |
FROM beaver2 | |
GROUP BY | |
day;") | |
# day avg_temp |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Methods for doing Excel's VLOOKUP with R | |
# sample data | |
x <- data.frame(id = c(1, 2, 3, 4), name = c('foo', 'bar', 'bob', 'joe')) | |
y <- data.frame(idblah = c(5, 2, 4, 3, 1), sex = c('m', 'f', 'f', 'm', 'm')) | |
z <- data.frame(id = c(1, 2, 3, 4, 5), sex = c('g', 'b', 'b', 'g', 'g')) | |
# function for find a single value | |
vlookup <- function(val, df, col){ | |
df[df[1] == val, col][1] } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# three correlation methods | |
duration = faithful$eruptions # the eruption durations | |
waiting = faithful$waiting # the waiting period | |
plot(duration, waiting) | |
cor(duration, waiting) | |
cor.test(duration, waiting) | |
# distance correlation statistic |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# get data | |
setwd("C:/Downloads/html") # this folder has only the HTML files | |
html <- list.files() | |
# load packages | |
library(tm) | |
library(RCurl) | |
library(XML) | |
# get some code from github to convert HTML to text | |
writeChar(con="htmlToText.R", (getURL(ssl.verifypeer = FALSE, "https://raw.github.com/tonybreyal/Blog-Reference-Functions/master/R/htmlToText/htmlToText.R"))) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
list.of.packages <- c("xx", "yy") # replace xx and yy with package names | |
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])] | |
if(length(new.packages)) install.packages(new.packages) | |
lapply(list.of.packages, require, character.only=T) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
http://rpsychologist.com/how-to-work-with-google-ngram-data-sets-in-r-using-mysql/ | |
# get ngram data (files a-z) from | |
http://books.google.com/ngrams/datasets | |
# get the a-z files into one big CSV file, use cmd in folder containing all the csv files | |
http://www.solveyourtech.com/merge-csv-files/ | |
copy *.csv all-ngrams.csv | |
# get MySQL, install, install client libraries, fuss about to make a new database |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pc <- prcomp(~ . - Species, data = iris, scale = TRUE) | |
library(rgl) | |
plot3d(pc$x[,1:3], xlab="Component 1", ylab="Component 2", zlab="Component 3", type="n", box=F, axes=T) | |
decorate3d(xlab = "x", ylab = "y", zlab = "z", | |
box = TRUE, axes = TRUE, main = NULL, sub = NULL, | |
top = TRUE, aspect = FALSE, expand = 1.03) | |
spheres3d(pc$x[,1:3], radius=0.1, col=rep(c("red","green","black"), each = 50)) | |
grid3d(c("x", "y+", "z")) | |
text3d(pc$x[,1:3], text=rownames(pc$x), adj=1.3) |