jalapic · March 8, 2015 18:42
diff --git a/expandrows_issue b/expandrows_issue
 ### Checking 'expandrows' function:

 ## Subsetted example dataset:

 mydf <- structure(list(Timestamp = c("2/25/2015 15:14:09", "2/25/2015 15:16:49", 
                             "2/25/2015 15:17:06", "2/25/2015 15:18:10", "2/25/2015 15:20:16", 
                             "2/25/2015 15:21:34", "3/1/2015 14:47:32", "3/1/2015 14:49:00", 
                             "3/1/2015 14:50:50", "3/1/2015 14:56:46", "3/1/2015 14:58:08"
 ), Actor = c("23", "15, 28", "6", "27", "23", "29", "30", "13", 
             "26", "13", "20"), Behavior = c("Fighting", "Fighting", "Chasing", 
                                             "Chasing", "Fighting, Chasing", "Chasing", "Chasing, Subordinate", 
                                             "Chasing", "Chasing", "Fighting", "Chasing"), Recipient = c("4", 
                                                                                                         "15, 28", "17", "22", "2", "26", "8", "16", "29", "6, 16", "3"
                                             )), .Names = c("Timestamp", "Actor", "Behavior", "Recipient"), row.names = c("7", 
                                                                                                                          "8", "9", "10", "11", "12", "271", "272", "273", "274", "275"
                                             ), class = "data.frame")




 mydf




 #### Option 1.  Using function loaded directly into global environment:


 #### This is Ananda Mahto's function based on answer here:
 #### http://stackoverflow.com/questions/25901851/duplicating-modifying-rows-of-a-dataframe-dependent-on-observations-r

 expandrows1 <- function(df){
  
  library(splitstackshape)  
  library(data.table)  
  
  
  temp <- cSplit(cSplit(cbind(id = 1:nrow(df), df),
                        "Actor", ",", "long"), 
                 "Recipient", ",", "long")
  

  ## Convert "Actor" and "Recipient" to numeric
  SD <- c("Actor", "Recipient")
  temp[, (SD) := lapply(.SD, as.numeric), .SDcols = SD]
  

  ## Sort Actors and Recipients, and check for duplicates and any points where Actors equal Recipients  
  temp[, toDrop := duplicated(
    paste(pmin(Actor, Recipient), pmax(Actor, Recipient))) |
      Actor == Recipient, by = id]
  
  ## Create your "score" column
  temp[, score := ifelse(any(toDrop), 0.5, 1), by = id]
  
  ## Subset and drop the irrelevant columns
  out <- temp[!temp[, toDrop, with = TRUE]][, toDrop := NULL]
  
  return(out)
 }

 expandrows1(mydf)  # works !

 #    id          Timestamp Actor             Behavior Recipient score
 #1:   1 2/25/2015 15:14:09    23             Fighting         4   1.0
 #2:   2 2/25/2015 15:16:49    15             Fighting        28   0.5
 #3:   3 2/25/2015 15:17:06     6              Chasing        17   1.0
 #4:   4 2/25/2015 15:18:10    27              Chasing        22   1.0
 #5:   5 2/25/2015 15:20:16    23    Fighting, Chasing         2   1.0
 #6:   6 2/25/2015 15:21:34    29              Chasing        26   1.0
 #7:   7  3/1/2015 14:47:32    30 Chasing, Subordinate         8   1.0
 #8:   8  3/1/2015 14:49:00    13              Chasing        16   1.0
 #9:   9  3/1/2015 14:50:50    26              Chasing        29   1.0
 #10: 10  3/1/2015 14:56:46    13             Fighting         6   1.0
 #11: 10  3/1/2015 14:56:46    13             Fighting        16   1.0
 #12: 11  3/1/2015 14:58:08    20              Chasing         3   1.0

 #although above does receive this warning:
 # *** NB: by=.EACHI is now explicit. See README to restore previous behaviour.



 #### Option 2.  Using 'curleylab' R package:

 # this only contains one function - 'expandrows'

 ### install:
 devtools::install_github('jalapic/curleylab')

 library(curleylab) #load

 expandrows(mydf)  ### does not work...

 ### ERROR MESSAGE: 

 #  Error in `:=`((SD), lapply(.SD, as.numeric)) : 
 #  Check that is.data.table(DT) == TRUE. Otherwise, := and `:=`(...) are defined for use in j, once only and in particular ways. See help(":=").
	### Checking 'expandrows' function:

	## Subsetted example dataset:

	mydf <- structure(list(Timestamp = c("2/25/2015 15:14:09", "2/25/2015 15:16:49",
	"2/25/2015 15:17:06", "2/25/2015 15:18:10", "2/25/2015 15:20:16",
	"2/25/2015 15:21:34", "3/1/2015 14:47:32", "3/1/2015 14:49:00",
	"3/1/2015 14:50:50", "3/1/2015 14:56:46", "3/1/2015 14:58:08"
	), Actor = c("23", "15, 28", "6", "27", "23", "29", "30", "13",
	"26", "13", "20"), Behavior = c("Fighting", "Fighting", "Chasing",
	"Chasing", "Fighting, Chasing", "Chasing", "Chasing, Subordinate",
	"Chasing", "Chasing", "Fighting", "Chasing"), Recipient = c("4",
	"15, 28", "17", "22", "2", "26", "8", "16", "29", "6, 16", "3"
	)), .Names = c("Timestamp", "Actor", "Behavior", "Recipient"), row.names = c("7",
	"8", "9", "10", "11", "12", "271", "272", "273", "274", "275"
	), class = "data.frame")




	mydf




	#### Option 1. Using function loaded directly into global environment:


	#### This is Ananda Mahto's function based on answer here:
	#### http://stackoverflow.com/questions/25901851/duplicating-modifying-rows-of-a-dataframe-dependent-on-observations-r

	expandrows1 <- function(df){

	library(splitstackshape)
	library(data.table)


	temp <- cSplit(cSplit(cbind(id = 1:nrow(df), df),
	"Actor", ",", "long"),
	"Recipient", ",", "long")


	## Convert "Actor" and "Recipient" to numeric
	SD <- c("Actor", "Recipient")
	temp[, (SD) := lapply(.SD, as.numeric), .SDcols = SD]


	## Sort Actors and Recipients, and check for duplicates and any points where Actors equal Recipients
	temp[, toDrop := duplicated(
	paste(pmin(Actor, Recipient), pmax(Actor, Recipient))) \|
	Actor == Recipient, by = id]

	## Create your "score" column
	temp[, score := ifelse(any(toDrop), 0.5, 1), by = id]

	## Subset and drop the irrelevant columns
	out <- temp[!temp[, toDrop, with = TRUE]][, toDrop := NULL]

	return(out)
	}

	expandrows1(mydf) # works !

	# id Timestamp Actor Behavior Recipient score
	#1: 1 2/25/2015 15:14:09 23 Fighting 4 1.0
	#2: 2 2/25/2015 15:16:49 15 Fighting 28 0.5
	#3: 3 2/25/2015 15:17:06 6 Chasing 17 1.0
	#4: 4 2/25/2015 15:18:10 27 Chasing 22 1.0
	#5: 5 2/25/2015 15:20:16 23 Fighting, Chasing 2 1.0
	#6: 6 2/25/2015 15:21:34 29 Chasing 26 1.0
	#7: 7 3/1/2015 14:47:32 30 Chasing, Subordinate 8 1.0
	#8: 8 3/1/2015 14:49:00 13 Chasing 16 1.0
	#9: 9 3/1/2015 14:50:50 26 Chasing 29 1.0
	#10: 10 3/1/2015 14:56:46 13 Fighting 6 1.0
	#11: 10 3/1/2015 14:56:46 13 Fighting 16 1.0
	#12: 11 3/1/2015 14:58:08 20 Chasing 3 1.0

	#although above does receive this warning:
	# *** NB: by=.EACHI is now explicit. See README to restore previous behaviour.



	#### Option 2. Using 'curleylab' R package:

	# this only contains one function - 'expandrows'

	### install:
	devtools::install_github('jalapic/curleylab')

	library(curleylab) #load

	expandrows(mydf) ### does not work...

	### ERROR MESSAGE:

	# Error in `:=`((SD), lapply(.SD, as.numeric)) :
	# Check that is.data.table(DT) == TRUE. Otherwise, := and `:=`(...) are defined for use in j, once only and in particular ways. See help(":=").