Created
April 16, 2016 20:17
-
-
Save StuartGordonReid/02ecb544dd23c4147da5093cc84f39c7 to your computer and use it in GitHub Desktop.
Compression Test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
compressionTest <- function(code, years = 7, algo = "g") { | |
# The generic Quandl API key for TuringFinance. | |
Quandl.api_key("t6Rn1d5N1W6Qt4jJq_zC") | |
# Download the raw price data. | |
data <- Quandl(code, rows = -1, type = "xts") | |
# Extract the variable we are interested in. | |
ix.ac <- which(colnames(data) == "Adjusted Close") | |
if (length(ix.ac) == 0) | |
ix.ac <- which(colnames(data) == "Close") | |
ix.rate <- which(colnames(data) == "Rate") | |
closes <- data[ ,max(ix.ac, ix.rate)] | |
# Get the month endpoints. | |
monthends <- endpoints(closes) | |
monthends <- monthends[2:length(monthends) - 1] | |
# Observed compression ratios. | |
cratios <- c() | |
for (t in ((12 * years) + 1):length(monthends)) { | |
# Extract a window of length equal to years. | |
window <- closes[monthends[t - (12 * years)]:monthends[t]] | |
# Compute detrended log returns. | |
returns <- Return.calculate(window, method = "log") | |
returns <- na.omit(returns) - mean(returns, na.rm = T) | |
# Binarize the returns. | |
returns[returns < 0] <- 0 | |
returns[returns > 0] <- 1 | |
# Convert into raw hexadecimal. | |
hexrets <- bin2rawhex(returns) | |
# Compute the compression ratio | |
cratios <- c(cratios, length(memCompress(hexrets)) / | |
length(hexrets)) | |
} | |
# Expected compression ratios. | |
ecratios <- c() | |
for (i in 1:length(cratios)) { | |
# Generate some benchmark returns. | |
returns <- rnorm(252 * years) | |
# Binarize the returns. | |
returns[returns < 0] <- 0 | |
returns[returns > 0] <- 1 | |
# Convert into raw hexadecimal. | |
hexrets <- bin2rawhex(returns) | |
# Compute the compression ratio | |
ecratios <- c(ecratios, length(memCompress(hexrets)) / | |
length(hexrets)) | |
} | |
if (mean(cratios) >= min(1.0, mean(ecratios))) { | |
print(paste("Dataset:", code, "is not compressible { c =", | |
mean(cratios), "} --> efficient.")) | |
} else { | |
print(paste("Dataset:", code, "is compressible { c =", | |
mean(cratios), "} --> inefficient.")) | |
} | |
} | |
bin2rawhex <- function(bindata) { | |
bindata <- as.numeric(as.vector(bindata)) | |
lbindata <- split(bindata, ceiling(seq_along(bindata)/4)) | |
hexdata <- as.vector(unlist(mclapply(lbindata, bin2hex))) | |
hexdata <- paste(hexdata, sep = "", collapse = "") | |
hexdata <- substring(hexdata, | |
seq(1, nchar(hexdata), 2), | |
seq(2, nchar(hexdata), 2)) | |
return(as.raw(as.hexmode(hexdata))) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment