Created
December 2, 2014 17:25
-
-
Save jaehyeon-kim/f3a80290746b325a6e8e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(knitr) | |
library(lubridate) | |
library(stringr) | |
library(reshape2) | |
library(plyr) | |
# create data folder | |
dataDir <- paste0("data","_",format(Sys.Date(),"%Y-%m-%d")) | |
if(file.exists(dataDir)) { | |
unlink(dataDir, recursive = TRUE) | |
dir.create(dataDir) | |
} else { | |
dir.create(dataDir) | |
} | |
# assumes codes are known beforehand | |
codes <- c("MSFT", "TCHC") | |
urls <- paste0("http://www.google.com/finance/historical?q=NASDAQ:", | |
codes,"&output=csv") | |
paths <- paste0(dataDir,"/",codes,".csv") # backward slash on windows (\) | |
# simple error handling in case file doesn't exists | |
downloadFile <- function(url, path, ...) { | |
# remove file if exists already | |
if(file.exists(path)) file.remove(path) | |
# download file | |
tryCatch( | |
download.file(url, path, ...), error = function(c) { | |
# remove file if error | |
if(file.exists(path)) file.remove(path) | |
# create error message | |
c$message <- paste(substr(path, 1, 4),"failed") | |
message(c$message) | |
} | |
) | |
} | |
# wrapper of mapply | |
Map(downloadFile, urls, paths) | |
# read all csv files and merge | |
files <- dir(dataDir, full.name = TRUE) | |
dataList <- llply(files, function(file){ | |
# get code from file path | |
pattern <- "/[A-Z][A-Z][A-Z][A-Z]" | |
code <- substr(str_extract(file, pattern), 2, nchar(str_extract(file, pattern))) | |
tryCatch({ | |
data <- read.csv(file, stringsAsFactors = FALSE) | |
# first column's name is funny | |
names(data) <- c("Date","Open","High","Low","Close","Volume") | |
data$Date <- dmy(data$Date) | |
data$Close <- as.numeric(data$Close) | |
data$Code <- code | |
# optional | |
data$Open <- as.numeric(data$Open) | |
data$High <- as.numeric(data$High) | |
data$Low <- as.numeric(data$Low) | |
data$Volume <- as.integer(data$Volume) | |
# select only 'Date', 'Close' and 'Code' | |
# raw data should be arranged in an ascending order | |
arrange(subset(data, select = c(Date, Close, Code)), Date) | |
}, | |
error = function(c){ | |
c$message <- paste(code,"failed") | |
message(c$message) | |
# return a dummy data frame not to break function | |
data <- data.frame(Date=dmy(format(Sys.Date(),"%d%m%Y")), Close=0, Code="NA") | |
data | |
}) | |
}, .progress = "text") | |
# data is combined to create a long format | |
# dummy data frame values are filtered out | |
data <- ldply(.data = dataList, .fun = rbind) | |
data <- subset(data, Code != "NA") | |
# data is converted into a wide format | |
data <- dcast(data, Date ~ Code, value.var="Close") | |
# Date is filtered out | |
data <- subset(data, select = c(-Date)) | |
# apply log difference column wise | |
dailyRet <- apply(log(data), 2, diff, lag=1) | |
# obtain daily return, variance and correlation | |
returns <- apply(dailyRet, 2, sum, na.rm = TRUE) | |
std <- apply(dailyRet, 2, sd, na.rm = TRUE) | |
correlation <- cor(dailyRet) | |
returns | |
std | |
correlation | |
# delete data folder | |
if(file.exists(dataDir)) { unlink(dataDir, recursive = TRUE) } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment