Created
February 8, 2016 18:08
-
-
Save mmparker/8aca803eae5410875a21 to your computer and use it in GitHub Desktop.
Snippet for lagging within groups (or applying any other transformFunc) using RRE
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is an example of how to apply the lagging function from this | |
# StackOverflow answer: http://stackoverflow.com/a/30874772/143319 | |
# to grouped data. In short: | |
# 1. Use rxSplit() to put each group in its own XDF file | |
# 2. Use lapply() to iterate over the list of XDF files | |
# Pick a sample dataset | |
xdfPath <- file.path(rxGetOption("sampleDataDir"), "DJIAdaily.xdf") | |
# Take a quick look at the dataset | |
rxGetInfo(xdfPath, getVarInfo = TRUE, numRows = 10) | |
# Set up a temporary directory for the files | |
demoDir <- tempdir() | |
djiaSplit <- rxSplit(inData = xdfPath, | |
outFilesBase = file.path(demoDir, "DJIA"), | |
splitByFactor = "DayOfWeek") | |
# Check to see that the files have been created | |
list.files(demoDir) | |
# The djiaSplit object created contains that information, too. | |
djiaSplit | |
# It's a list... | |
class(djiaSplit) | |
# ... of RxXdfData objects (see ?RxXdfData) | |
class(djiaSplit[[1]]) | |
# Since I have a list of the XDF files now, I can use lapply to iterate over each | |
lapply(djiaSplit, FUN = function(xdf) { | |
rxGetInfo(xdf) | |
}) | |
# Sorting is just an extension of that pattern | |
lapply(djiaSplit, FUN = function(xdf) { | |
rxSort(inData = xdf, | |
outFile = xdf, | |
sortByVars = "Date", | |
overwrite = TRUE) | |
}) | |
# Here's the function, straight from the StackOverflow post | |
lagVar <- function(dataList) { | |
if(.rxStartRow == 1) { | |
dataList[[newName]] <- c(NA, dataList[[varToLag]][-.rxNumRows]) | |
} else { | |
dataList[[newName]] <- c(.rxGet("lastValue"), | |
dataList[[varToLag]][-.rxNumRows]) | |
} | |
.rxSet("lastValue", dataList[[varToLag]][.rxNumRows]) | |
dataList | |
} | |
# Now I'll apply the lagging function in the same way - I'll just wrap it with lapply() | |
lapply(djiaSplit, FUN = function(xdf) { | |
rxDataStep(inData = xdf, | |
outFile = xdf, | |
transformObjects = list( | |
varToLag = "Open", | |
newName = "previousOpen"), | |
transformFunc = lagVar, | |
# append = "cols", | |
overwrite = TRUE) | |
}) | |
# Check the results | |
lapply(djiaSplit, FUN = function(xdf) { | |
rxDataStep(xdf, | |
varsToKeep = c("Date", "Open", "previousOpen"), | |
numRows = 10) | |
}) | |
# Finally, if you want to recombine into a single XDF: | |
# Create a new XDF pointer | |
djiaFull <- tempfile(tmpdir = demoDir, fileext = ".xdf") | |
lapply(djiaSplit, FUN = function(xdf) { | |
rxImport(inData = xdf, | |
outFile = djiaFull, | |
# append needs to be TRUE for the first file, FALSE for the other six: | |
append = file.exists(djiaFull)) | |
}) | |
# Check the results | |
rxGetInfo(djiaFull, getVarInfo = TRUE, numRows = 10) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment