EconometricsBySimulation · April 14, 2016 21:37
diff --git a/ConsumptionInference.R b/ConsumptionInference.R
 # Estimating Weekly consumption from periodic purchasing data

 library(dplyr)
 library(data.table)
 library(reshape)
 library(tidyr)
 library(ggplot2)


 # Day Purchase
 nperiod <- c(1,2,3,5,6,7 ,8, 9, 10,  15, 20)
 pperiod <- c(3,3,3,2,1,1,.5,.5, .2,  .2, .1)
 ndays <- 250

 # Number of days observed
 obsperiod <- 7
  
 nsim <- 250

 # Other Periods
 op <- 5

 mpurch <- cpurch <- matrix(0, nrow=ndays, ncol=length(nperiod))
 # Mpurch is the quantity purchased
 # Cpurch is the number of days that quantity is purchased for

 for (i in 1:ndays) for (j in 1:length(nperiod)) if(sum(mpurch[1:i, j])<i) mpurch[i,j] <- nperiod[j]

 # The number of days expected to consume is equal to quantity purchased
 cpurch <- mpurch 


 #Imagine a random week selected from each purchasing pattern

 #Method 1 & 2 empty matrices
 m <- data.table(j=NA, op=NA, m=NA, x=NA)[-1,]

 for (z in 0:op) {
  
  mpurchz <- mpurch/(z+1)
  
  for (i in 1:nsim) {
  if (i/50==round(i/50)) print(i)
  
  day  <- sample(ndays-obsperiod-1,1)
  msamp  <- mpurchz[day:(day+obsperiod-1), ]
  csamp  <- cpurch[day:(day+obsperiod-1), ]

  for (j in 1:length(nperiod)) {

    draw <- NULL
    # create a combined consumption combining the consumption from the z items.
    # The first item is always for the j list while the remaining items are drawn randomly from the probility
    # distribution pperiod
    for (jj in 0:z) {
      if (jj==0) y <- j
      if (jj>0)  y <- sample(length(nperiod),1,prob=pperiod)
      draw <- rbind(draw, 
                cbind(prod=jj+1, 
                  day=1:obsperiod, 
                  quant=msamp[,y], 
                  perday=msamp[,y]/csamp[,y], 
                  dur=csamp[,y]))
    }

    draw <- draw %>% as.data.table
    
    # Calculate the probability adjusted consumption
    draw[dur<=obsperiod, perday2 := quant/dur]
    draw[dur>obsperiod,  perday2 := quant/obsperiod]

    # Expand the data so that consumption is spead over the next number of days
    expanded <- untable(draw,num=draw$dur) %>% as.data.table()
    
    # Increase the day counter
    expanded[, day2 := day+seq(.N)-1, by=.(prod,day)]
    
    # Method 1
    summed <- draw[!is.na(perday)] %>% group_by(day) %>% summarize(quant=sum(perday))
    m <- data.table(j=nperiod[j], op=z, m=1, x=summed[,quant] %>% mean) %>% rbind(m) 
    
    # Method 2
    summed <- draw[!is.na(perday2)] %>% group_by(day) %>% summarize(quant=sum(perday2))
    m <- data.table(j=nperiod[j], op=z, m=2, x=summed[,quant] %>% mean) %>% rbind(m) 
    
    # Method 3
    summed <- expanded[day2<=obsperiod & !is.na(perday)] %>% group_by(day2) %>% summarize(quant=sum(perday))
    m <- data.table(j=nperiod[j], op=z, m=3, x=summed[,quant] %>% mean) %>% rbind(m) 
    
    # Method 4
    # Day 3 is starting the day counter at 1 even if the first observation is on day 2 or more
    expanded[, day3 := day2-min(day2)+1]
    
    summed <- expanded[day3<=obsperiod  & !is.na(perday)] %>% group_by(day2) %>% summarize(quant=sum(perday))
    m <- data.table(j=nperiod[j], op=z, m=4, x=summed[,quant] %>% mean) %>% rbind(m) 
    
    # Method 5
    summed <- expanded[day2<=obsperiod & !is.na(perday)] %>% group_by(day2) %>% summarize(quant=sum(perday2))
    m <- data.table(j=nperiod[j], op=z, m=5, x=summed[,quant] %>% mean) %>% rbind(m) 
    
    summed <- expanded[day3<=obsperiod & !is.na(perday)] %>% group_by(day2) %>% summarize(quant=sum(perday2))
    m <- data.table(j=nperiod[j], op=z, m=6, x=summed[,quant] %>% mean) %>% rbind(m) 
    
    }
 }}

 options(dplyr.print_max = 1e9)

 m %>% group_by(m,op,j) %>% summarise(mean=mean(x)) %>% arrange(m,op,j)
 m %>% group_by(m,op,j) %>% summarise(mean=mean(x,na.rm=TRUE)) %>% arrange(m,op,j)

 sub0 <- function(x, y=0) {x[is.na(x), x := y] ; return(x)}

 results <- m %>% sub0 %>% group_by(m,op,j) %>% summarise(mean=mean(x)) %>% arrange(m,op,j)

 setwd('C:/Users/fsmar/Dropbox/Econometrics by Simulation/2016-04-April')

 results %>% spread(m,mean) %>% write.csv("Consumption.csv")

 # note in the write up method 4 is method 5 in the simulation
 # Methods 4 and 6 are excluded from the write up

 results <- results[m %in% c(1:3,5),]
 results[m==5,m:=4]
 png(file='ConsumptionSpread.png', width=1200, height=900)
 results %>% ggplot(aes(y=mean, x=j, color=factor(m))) + 
  geom_smooth(lwd=3) + 
  theme_bw(base_size = 15) +
  xlab('Good Consumption Period')
 dev.off()
	# Estimating Weekly consumption from periodic purchasing data

	library(dplyr)
	library(data.table)
	library(reshape)
	library(tidyr)
	library(ggplot2)


	# Day Purchase
	nperiod <- c(1,2,3,5,6,7 ,8, 9, 10, 15, 20)
	pperiod <- c(3,3,3,2,1,1,.5,.5, .2, .2, .1)
	ndays <- 250

	# Number of days observed
	obsperiod <- 7

	nsim <- 250

	# Other Periods
	op <- 5

	mpurch <- cpurch <- matrix(0, nrow=ndays, ncol=length(nperiod))
	# Mpurch is the quantity purchased
	# Cpurch is the number of days that quantity is purchased for

	for (i in 1:ndays) for (j in 1:length(nperiod)) if(sum(mpurch[1:i, j])<i) mpurch[i,j] <- nperiod[j]

	# The number of days expected to consume is equal to quantity purchased
	cpurch <- mpurch


	#Imagine a random week selected from each purchasing pattern

	#Method 1 & 2 empty matrices
	m <- data.table(j=NA, op=NA, m=NA, x=NA)[-1,]

	for (z in 0:op) {

	mpurchz <- mpurch/(z+1)

	for (i in 1:nsim) {
	if (i/50==round(i/50)) print(i)

	day <- sample(ndays-obsperiod-1,1)
	msamp <- mpurchz[day:(day+obsperiod-1), ]
	csamp <- cpurch[day:(day+obsperiod-1), ]

	for (j in 1:length(nperiod)) {

	draw <- NULL
	# create a combined consumption combining the consumption from the z items.
	# The first item is always for the j list while the remaining items are drawn randomly from the probility
	# distribution pperiod
	for (jj in 0:z) {
	if (jj==0) y <- j
	if (jj>0) y <- sample(length(nperiod),1,prob=pperiod)
	draw <- rbind(draw,
	cbind(prod=jj+1,
	day=1:obsperiod,
	quant=msamp[,y],
	perday=msamp[,y]/csamp[,y],
	dur=csamp[,y]))
	}

	draw <- draw %>% as.data.table

	# Calculate the probability adjusted consumption
	draw[dur<=obsperiod, perday2 := quant/dur]
	draw[dur>obsperiod, perday2 := quant/obsperiod]

	# Expand the data so that consumption is spead over the next number of days
	expanded <- untable(draw,num=draw$dur) %>% as.data.table()

	# Increase the day counter
	expanded[, day2 := day+seq(.N)-1, by=.(prod,day)]

	# Method 1
	summed <- draw[!is.na(perday)] %>% group_by(day) %>% summarize(quant=sum(perday))
	m <- data.table(j=nperiod[j], op=z, m=1, x=summed[,quant] %>% mean) %>% rbind(m)

	# Method 2
	summed <- draw[!is.na(perday2)] %>% group_by(day) %>% summarize(quant=sum(perday2))
	m <- data.table(j=nperiod[j], op=z, m=2, x=summed[,quant] %>% mean) %>% rbind(m)

	# Method 3
	summed <- expanded[day2<=obsperiod & !is.na(perday)] %>% group_by(day2) %>% summarize(quant=sum(perday))
	m <- data.table(j=nperiod[j], op=z, m=3, x=summed[,quant] %>% mean) %>% rbind(m)

	# Method 4
	# Day 3 is starting the day counter at 1 even if the first observation is on day 2 or more
	expanded[, day3 := day2-min(day2)+1]

	summed <- expanded[day3<=obsperiod & !is.na(perday)] %>% group_by(day2) %>% summarize(quant=sum(perday))
	m <- data.table(j=nperiod[j], op=z, m=4, x=summed[,quant] %>% mean) %>% rbind(m)

	# Method 5
	summed <- expanded[day2<=obsperiod & !is.na(perday)] %>% group_by(day2) %>% summarize(quant=sum(perday2))
	m <- data.table(j=nperiod[j], op=z, m=5, x=summed[,quant] %>% mean) %>% rbind(m)

	summed <- expanded[day3<=obsperiod & !is.na(perday)] %>% group_by(day2) %>% summarize(quant=sum(perday2))
	m <- data.table(j=nperiod[j], op=z, m=6, x=summed[,quant] %>% mean) %>% rbind(m)

	}
	}}

	options(dplyr.print_max = 1e9)

	m %>% group_by(m,op,j) %>% summarise(mean=mean(x)) %>% arrange(m,op,j)
	m %>% group_by(m,op,j) %>% summarise(mean=mean(x,na.rm=TRUE)) %>% arrange(m,op,j)

	sub0 <- function(x, y=0) {x[is.na(x), x := y] ; return(x)}

	results <- m %>% sub0 %>% group_by(m,op,j) %>% summarise(mean=mean(x)) %>% arrange(m,op,j)

	setwd('C:/Users/fsmar/Dropbox/Econometrics by Simulation/2016-04-April')

	results %>% spread(m,mean) %>% write.csv("Consumption.csv")

	# note in the write up method 4 is method 5 in the simulation
	# Methods 4 and 6 are excluded from the write up

	results <- results[m %in% c(1:3,5),]
	results[m==5,m:=4]
	png(file='ConsumptionSpread.png', width=1200, height=900)
	results %>% ggplot(aes(y=mean, x=j, color=factor(m))) +
	geom_smooth(lwd=3) +
	theme_bw(base_size = 15) +
	xlab('Good Consumption Period')
	dev.off()
No results found