Skip to content

Instantly share code, notes, and snippets.

# performing set difference between two vectors
order_grp_a <- sort(orders_table_a$orders_id)
order_grp_b <- sort(orders_table_b$orders_id)
order_diff <- setdiff(order_grp_b, order_grp_a)
# load up libraries
library(ggplot2)
library(plyr)
library(XLConnect)
# try and read worksheet directly
original_data <- XLConnect::readWorksheet(loadWorkbook("/home/path/to/given_data.xlsx"), sheet=1)
"
Error: OutOfMemoryError (Java): GC overhead limit exceeded
"
# Django templating style variable interpolation via https://github.com/jokergoo/GetoptLong
library(GetoptLong)
country <- "SG"
my_query <- "SELECT * FROM my_table WHERE country='{{country}}';"
my_query <- qq(my_query, code.pattern="\\{\\{CODE\\}\\}")
cat(my_query)
"
> cat(my_query)
# Plotting a v-line on date
date_idx <- which(df$Date=="2013-11-20")
p <- ggplot(df, aes(Date, Value))
p + geom_line() + geom_vline(xintercept=as.numeric(df$Date[date_idx]))
# load sample data (with duplicates reported earlier)
basket <- read.csv("/home/kenny/some_data_with_duplicates.csv", header=TRUE)
# remove duplicates and sort by column
basket <- unique(basket)
basket <- basket[with(basket, order(basket_id)), ] # sort by ident
inspect(sort(basketApriori, by="confidence"))
"
lhs rhs support confidence lift
1 {} => {Apparel and Accessories} 0.4846383 0.4846383 1
2 {} => {Shoes} 0.3488603 0.3488603 1
3 {} => {Accessories} 0.1219029 0.1219029 1
"
# Lowering the support/confidence to negl levels
# Reload data from list via Quandl
watchSymbols <- c(
"GOOG/NYSE_TWTR",
"GOOG/NASDAQ_FB"
)
watchNames <- c(
"twitter",
"facebook"
# reverse columns in a data frame
reverse_df <- function(df) {
col_names <- names(df)
for(col_idx in 1:length(col_names)) {
df[[col_idx]] <- rev(df[[col_idx]])
}
return(df)
}
vix <- Quandl("YAHOO/INDEX_VIX")
boxPlot(vix$Close)
describe(vix$Close)
"
vix$Close
n missing unique Mean .05 .10 .25 .50 .75 .90 .95
6031 0 2280 20.21 11.54 12.25 14.49 18.53 23.69 29.73 34.63
lowest : 9.31 9.48 9.70 9.82 9.89, highest: 72.67 74.26 79.13 80.06 80.86
library("RPostgreSQL")
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, dbname="remote_db", user="kenny", port="5432", host="remote_db_host")
# create an environment container
myData <- new.env()
# fetch data
cur <- dbSendQuery(con, "select * from some_table order by date desc limit 1000;")