Last active
April 14, 2016 21:37
-
-
Save EconometricsBySimulation/861ee0e81909b9cc1bd9ca4b14c2dbc7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Estimating Weekly consumption from periodic purchasing data | |
library(dplyr) | |
library(data.table) | |
library(reshape) | |
library(tidyr) | |
library(ggplot2) | |
# Day Purchase | |
nperiod <- c(1,2,3,5,6,7 ,8, 9, 10, 15, 20) | |
pperiod <- c(3,3,3,2,1,1,.5,.5, .2, .2, .1) | |
ndays <- 250 | |
# Number of days observed | |
obsperiod <- 7 | |
nsim <- 250 | |
# Other Periods | |
op <- 5 | |
mpurch <- cpurch <- matrix(0, nrow=ndays, ncol=length(nperiod)) | |
# Mpurch is the quantity purchased | |
# Cpurch is the number of days that quantity is purchased for | |
for (i in 1:ndays) for (j in 1:length(nperiod)) if(sum(mpurch[1:i, j])<i) mpurch[i,j] <- nperiod[j] | |
# The number of days expected to consume is equal to quantity purchased | |
cpurch <- mpurch | |
#Imagine a random week selected from each purchasing pattern | |
#Method 1 & 2 empty matrices | |
m <- data.table(j=NA, op=NA, m=NA, x=NA)[-1,] | |
for (z in 0:op) { | |
mpurchz <- mpurch/(z+1) | |
for (i in 1:nsim) { | |
if (i/50==round(i/50)) print(i) | |
day <- sample(ndays-obsperiod-1,1) | |
msamp <- mpurchz[day:(day+obsperiod-1), ] | |
csamp <- cpurch[day:(day+obsperiod-1), ] | |
for (j in 1:length(nperiod)) { | |
draw <- NULL | |
# create a combined consumption combining the consumption from the z items. | |
# The first item is always for the j list while the remaining items are drawn randomly from the probility | |
# distribution pperiod | |
for (jj in 0:z) { | |
if (jj==0) y <- j | |
if (jj>0) y <- sample(length(nperiod),1,prob=pperiod) | |
draw <- rbind(draw, | |
cbind(prod=jj+1, | |
day=1:obsperiod, | |
quant=msamp[,y], | |
perday=msamp[,y]/csamp[,y], | |
dur=csamp[,y])) | |
} | |
draw <- draw %>% as.data.table | |
# Calculate the probability adjusted consumption | |
draw[dur<=obsperiod, perday2 := quant/dur] | |
draw[dur>obsperiod, perday2 := quant/obsperiod] | |
# Expand the data so that consumption is spead over the next number of days | |
expanded <- untable(draw,num=draw$dur) %>% as.data.table() | |
# Increase the day counter | |
expanded[, day2 := day+seq(.N)-1, by=.(prod,day)] | |
# Method 1 | |
summed <- draw[!is.na(perday)] %>% group_by(day) %>% summarize(quant=sum(perday)) | |
m <- data.table(j=nperiod[j], op=z, m=1, x=summed[,quant] %>% mean) %>% rbind(m) | |
# Method 2 | |
summed <- draw[!is.na(perday2)] %>% group_by(day) %>% summarize(quant=sum(perday2)) | |
m <- data.table(j=nperiod[j], op=z, m=2, x=summed[,quant] %>% mean) %>% rbind(m) | |
# Method 3 | |
summed <- expanded[day2<=obsperiod & !is.na(perday)] %>% group_by(day2) %>% summarize(quant=sum(perday)) | |
m <- data.table(j=nperiod[j], op=z, m=3, x=summed[,quant] %>% mean) %>% rbind(m) | |
# Method 4 | |
# Day 3 is starting the day counter at 1 even if the first observation is on day 2 or more | |
expanded[, day3 := day2-min(day2)+1] | |
summed <- expanded[day3<=obsperiod & !is.na(perday)] %>% group_by(day2) %>% summarize(quant=sum(perday)) | |
m <- data.table(j=nperiod[j], op=z, m=4, x=summed[,quant] %>% mean) %>% rbind(m) | |
# Method 5 | |
summed <- expanded[day2<=obsperiod & !is.na(perday)] %>% group_by(day2) %>% summarize(quant=sum(perday2)) | |
m <- data.table(j=nperiod[j], op=z, m=5, x=summed[,quant] %>% mean) %>% rbind(m) | |
summed <- expanded[day3<=obsperiod & !is.na(perday)] %>% group_by(day2) %>% summarize(quant=sum(perday2)) | |
m <- data.table(j=nperiod[j], op=z, m=6, x=summed[,quant] %>% mean) %>% rbind(m) | |
} | |
}} | |
options(dplyr.print_max = 1e9) | |
m %>% group_by(m,op,j) %>% summarise(mean=mean(x)) %>% arrange(m,op,j) | |
m %>% group_by(m,op,j) %>% summarise(mean=mean(x,na.rm=TRUE)) %>% arrange(m,op,j) | |
sub0 <- function(x, y=0) {x[is.na(x), x := y] ; return(x)} | |
results <- m %>% sub0 %>% group_by(m,op,j) %>% summarise(mean=mean(x)) %>% arrange(m,op,j) | |
setwd('C:/Users/fsmar/Dropbox/Econometrics by Simulation/2016-04-April') | |
results %>% spread(m,mean) %>% write.csv("Consumption.csv") | |
# note in the write up method 4 is method 5 in the simulation | |
# Methods 4 and 6 are excluded from the write up | |
results <- results[m %in% c(1:3,5),] | |
results[m==5,m:=4] | |
png(file='ConsumptionSpread.png', width=1200, height=900) | |
results %>% ggplot(aes(y=mean, x=j, color=factor(m))) + | |
geom_smooth(lwd=3) + | |
theme_bw(base_size = 15) + | |
xlab('Good Consumption Period') | |
dev.off() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment