This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
> #Inspect most popular words, minimum frequency of 20 | |
> findFreqTerms(dtm, lowfreq=20) | |
[1] "15" "2008" "2009" "2011" "a" "ad" "add" "adsens" | |
[9] "air" "analyt" "and" "appl" "at" "back" "bezel" "black" | |
[17] "book" "bookmark" "break" "broke" "broken" "bubbl" "by" "can" | |
[25] "case" "chang" "child" "code" "comment" "comput" "cost" "cover" | |
[33] "crack" "css" "custom" "data" "delet" "disabl" "display" "do" | |
[41] "doe" "drop" "edit" "eleven" "em209" "entri" "fix" "footer" | |
[49] "footerphp" "for" "free" "from" "get" "glue" "googl" "hadoop" | |
[57] "header" "hing" "how" "i |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#I think there are 5 main topics: Data Science, Web Analytics, R, Julia, Wordpress | |
kmeans5<- kmeans(dtm, 5) | |
#Merge cluster assignment back to keywords | |
kw_with_cluster <- as.data.frame(cbind(searchkeywords$'Natural Search Keyword', kmeans5$cluster)) | |
names(kw_with_cluster) <- c("keyword", "kmeans5") | |
#Make df for each cluster result, quickly "eyeball" results | |
cluster1 <- subset(kw_with_cluster, subset=kmeans5 == 1) | |
cluster2 <- subset(kw_with_cluster, subset=kmeans5 == 2) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Capitalize reserved keywords, indent between keywords and for multi-line statements | |
#Tables on separate lines | |
#Untested, hopefully this is valid SQL :) | |
SELECT | |
a.key, | |
a.col2, | |
a.col3, | |
COALESCE(a.col4,0) AS col4, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#accumulator for cost results | |
cost_df <- data.frame() | |
#run kmeans for all clusters up to 100 | |
for(i in 1:100){ | |
#Run kmeans for each level of i, allowing up to 100 iterations for convergence | |
kmeans<- kmeans(x=dtm, centers=i, iter.max=100) | |
#Combine cluster number and cost together, write to df | |
cost_df<- rbind(cost_df, cbind(i, kmeans$tot.withinss)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Calculate lm's for emphasis | |
lm(cost_df$cost[1:10] ~ cost_df$cluster[1:10]) | |
lm(cost_df$cost[10:19] ~ cost_df$cluster[10:19]) | |
lm(cost_df$cost[20:100] ~ cost_df$cluster[20:100]) | |
cost_df$fitted <- ifelse(cost_df$cluster <10, (19019.9 - 550.9*cost_df$cluster), | |
ifelse(cost_df$cluster <20, (15251.5 - 116.5*cost_df$cluster), | |
(13246.1 - 35.9*cost_df$cluster))) | |
#Cost plot |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Unzip file | |
unzip(zipfile= "/Volumes/32SDCARD/Data Sciences - General/SampleDailyDataFeed.zip", exdir= "~/Desktop/datafeed") | |
#Read in hit data | |
hit_data <- read.delim("~/Desktop/datafeed/hit_data.tsv", header=FALSE, stringsAsFactors = FALSE) | |
#Read in header to set column names | |
column_headers <- read.delim("~/Desktop/datafeed/column_headers.tsv") | |
#Set column headers for hit data to the column headers for 'column headers' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("RPostgreSQL") | |
#Connect to Redshift | |
redshift.Connection <- dbConnect(dbDriver("PostgreSQL"), host="XXXXXXXXXXXXXXX.redshift.amazonaws.com", dbname="XXXXXXXX", | |
user="XXXXXXX", password= "XXXXXXXXXX", port=XXXXXXXX) | |
query_string <- | |
"select * | |
from table | |
where something = 'True';" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Install the Redshift R library: | |
# https://github.com/pingles/redshift-r | |
# install.packages("~/Downloads/redshift-r-master", dependencies = T, repos = NULL, type = "source") | |
library(redshift) | |
redshift <- redshift.connect("jdbc:postgresql://REDSHIFT_DB:5439/DB_NAME", "LOGIN", "PASSWORD") | |
# Example Query: | |
data <- dbGetQuery(redshift, "SELECT COUNT(*) FROM table") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#### Start IPython, generate SHA1 password to use for IPython Notebook server | |
$ ipython | |
Python 2.7.5 |Anaconda 1.8.0 (x86_64)| (default, Oct 24 2013, 07:02:20) | |
Type "copyright", "credits" or "license" for more information. | |
IPython 1.1.0 -- An enhanced Interactive Python. | |
? -> Introduction and overview of IPython's features. | |
%quickref -> Quick reference. | |
help -> Python's own help system. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("RSiteCatalyst") | |
library("WriteXLS") | |
#Validate that underlying Perl modules for WriteXLS are installed correctly | |
#Will return "Perl found. All required Perl modules were found" if installed correctly | |
testPerl() | |
#### 1. Pull data for all report suites to create one comprehensive report #### | |
#Authenticate with Adobe Analytics API |