Last active
April 5, 2018 08:24
-
-
Save daroczig/0f9da51f2d177394d3012c14360b11a8 to your computer and use it in GitHub Desktop.
data.table joins
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## generate transactional tables | |
set.seed(42) | |
library(data.table) | |
tx <- data.table( | |
item = sample(letters[1:3], 10, replace = TRUE), | |
time = as.POSIXct(as.Date('2016-01-01')) - runif(10) * 36*60^2, | |
amount = rpois(10, 25)) | |
prices <- data.table( | |
item = letters[1:3], | |
date = as.Date('2016-01-01') - 1:2, | |
price = as.vector(outer(c(100, 200, 300), c(1, 1.2)))) | |
items <- data.table( | |
item = letters[1:3], | |
color = c('red', 'white', 'red'), | |
weight = c(2, 4, 2.5)) | |
## ############################################################################# | |
## filtering and aggregates | |
## ############################################################################# | |
## TODO filter for the transaction with "b" items | |
## TODO filter for the transaction with less than 25 items | |
## TODO filter for the transaction with less then 25 "b" items | |
## TODO count the number of transactions for each items | |
## TODO count the number of transactions for each day | |
## TODO count the overall number of items sold on each day | |
## ############################################################################# | |
## joins | |
## ############################################################################# | |
## TODO join tx and items | |
## TODO count the number of transactions with "red" items | |
## TODO count the overall revenue per colors | |
## TODO count the overall weight of items sold on each day | |
## TODO create a frequency table on the number of sold items per color | |
## ############################################################################# | |
## wide and long tables | |
## ############################################################################# | |
## TODO create a frequency table on the number of sold items per color and date | |
## traditional crosstab | |
table(tx$color, tx$date) | |
## TODO recreate the same structure with data.table | |
## ############################################################################# | |
## rolling joins | |
## ############################################################################# | |
## TODO find the percentage of amount change between transactions | |
## TODO did you order the transactions by time first? | |
## TODO merge the purchase price for the sold items | |
purchases <- data.table( | |
item = letters[1:3], | |
date = as.Date('2016-01-01') - c(1, 10), | |
cost = as.vector(outer(c(100, 200, 300), c(0.5, 0.75)))) | |
## TODO rolling join on date | |
## ############################################################################# | |
## overlap joins | |
## ############################################################################# | |
## TODO count the number of transactions where another tx happened on the | |
## same item in the past 1 hour |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment