Created
March 8, 2015 18:42
-
-
Save jalapic/4e0b26bd74d3b3751d7e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Checking 'expandrows' function: | |
## Subsetted example dataset: | |
mydf <- structure(list(Timestamp = c("2/25/2015 15:14:09", "2/25/2015 15:16:49", | |
"2/25/2015 15:17:06", "2/25/2015 15:18:10", "2/25/2015 15:20:16", | |
"2/25/2015 15:21:34", "3/1/2015 14:47:32", "3/1/2015 14:49:00", | |
"3/1/2015 14:50:50", "3/1/2015 14:56:46", "3/1/2015 14:58:08" | |
), Actor = c("23", "15, 28", "6", "27", "23", "29", "30", "13", | |
"26", "13", "20"), Behavior = c("Fighting", "Fighting", "Chasing", | |
"Chasing", "Fighting, Chasing", "Chasing", "Chasing, Subordinate", | |
"Chasing", "Chasing", "Fighting", "Chasing"), Recipient = c("4", | |
"15, 28", "17", "22", "2", "26", "8", "16", "29", "6, 16", "3" | |
)), .Names = c("Timestamp", "Actor", "Behavior", "Recipient"), row.names = c("7", | |
"8", "9", "10", "11", "12", "271", "272", "273", "274", "275" | |
), class = "data.frame") | |
mydf | |
#### Option 1. Using function loaded directly into global environment: | |
#### This is Ananda Mahto's function based on answer here: | |
#### http://stackoverflow.com/questions/25901851/duplicating-modifying-rows-of-a-dataframe-dependent-on-observations-r | |
expandrows1 <- function(df){ | |
library(splitstackshape) | |
library(data.table) | |
temp <- cSplit(cSplit(cbind(id = 1:nrow(df), df), | |
"Actor", ",", "long"), | |
"Recipient", ",", "long") | |
## Convert "Actor" and "Recipient" to numeric | |
SD <- c("Actor", "Recipient") | |
temp[, (SD) := lapply(.SD, as.numeric), .SDcols = SD] | |
## Sort Actors and Recipients, and check for duplicates and any points where Actors equal Recipients | |
temp[, toDrop := duplicated( | |
paste(pmin(Actor, Recipient), pmax(Actor, Recipient))) | | |
Actor == Recipient, by = id] | |
## Create your "score" column | |
temp[, score := ifelse(any(toDrop), 0.5, 1), by = id] | |
## Subset and drop the irrelevant columns | |
out <- temp[!temp[, toDrop, with = TRUE]][, toDrop := NULL] | |
return(out) | |
} | |
expandrows1(mydf) # works ! | |
# id Timestamp Actor Behavior Recipient score | |
#1: 1 2/25/2015 15:14:09 23 Fighting 4 1.0 | |
#2: 2 2/25/2015 15:16:49 15 Fighting 28 0.5 | |
#3: 3 2/25/2015 15:17:06 6 Chasing 17 1.0 | |
#4: 4 2/25/2015 15:18:10 27 Chasing 22 1.0 | |
#5: 5 2/25/2015 15:20:16 23 Fighting, Chasing 2 1.0 | |
#6: 6 2/25/2015 15:21:34 29 Chasing 26 1.0 | |
#7: 7 3/1/2015 14:47:32 30 Chasing, Subordinate 8 1.0 | |
#8: 8 3/1/2015 14:49:00 13 Chasing 16 1.0 | |
#9: 9 3/1/2015 14:50:50 26 Chasing 29 1.0 | |
#10: 10 3/1/2015 14:56:46 13 Fighting 6 1.0 | |
#11: 10 3/1/2015 14:56:46 13 Fighting 16 1.0 | |
#12: 11 3/1/2015 14:58:08 20 Chasing 3 1.0 | |
#although above does receive this warning: | |
# *** NB: by=.EACHI is now explicit. See README to restore previous behaviour. | |
#### Option 2. Using 'curleylab' R package: | |
# this only contains one function - 'expandrows' | |
### install: | |
devtools::install_github('jalapic/curleylab') | |
library(curleylab) #load | |
expandrows(mydf) ### does not work... | |
### ERROR MESSAGE: | |
# Error in `:=`((SD), lapply(.SD, as.numeric)) : | |
# Check that is.data.table(DT) == TRUE. Otherwise, := and `:=`(...) are defined for use in j, once only and in particular ways. See help(":="). | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment