Created
December 4, 2023 03:02
-
-
Save jamesbrandecon/12944aa4757e82a7997c2845172e5abb to your computer and use it in GitHub Desktop.
Basic implementation of Bayesian Logit and BLP
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Basic implementation of Bayesian Logit and BLP | |
# In both cases, we model the observed market shares as coming from | |
# a logit model with measurement error. | |
using Plots | |
using StatsPlots | |
using DataFrames | |
using Turing, FillArrays | |
using LinearAlgebra, DynamicHMC | |
# Using Pkg | |
# Pkg.add(url = "https://github.com/jamesbrandecon/FRAC.jl", rev = "rewrite") | |
using Random, FRAC # Need FRAC for Halton draws (for random coefficients in BLP) | |
M = 500; # 200 markets | |
J = 1; # 1 inside option | |
beta_true = 2; # coefficient on X in utility function | |
me_sigma_true = 0.2; # measurement error sd | |
# ----------------------------------------------------------- | |
# Simulation functions | |
# ----------------------------------------------------------- | |
# Simulate market-level demand for a single good | |
# utility of outside option set to 0 | |
function sim_agg_logit(theta, sigma, M, J) | |
@assert J==1 | |
# Simulate data | |
X = rand(M,J); # product characteristics for M markets, J inside goods | |
s = zeros(M,J); | |
for m = 1:M | |
theta_i = theta .+ randn(10000,1) .*sigma; | |
u = theta_i.*X[m,:]; # Expected utility | |
s[m,:] = mean(exp.(u) ./ (1 .+ sum(exp.(u), dims=2)), dims=1); # Market shares | |
end | |
return s, X | |
end | |
# Adding in product-market demand shifters xi which are mildly correlated with price | |
function sim_agg_logit_with_endogeneity(theta, sigma, beta_endog, gamma_endog, price_sigma, M, J) | |
@assert J==1 | |
# Simulate data | |
IV = rand(M,J); # IV | |
s = zeros(M,J); | |
xi = randn(M,J); # market level shifters | |
price = beta_endog .*IV .+ gamma_endog .*xi .+ price_sigma.*randn(M,J); # price for M markets, J inside goods | |
for m = 1:M | |
theta_i = theta .+ randn(10000,1) .*sigma; | |
u = theta_i.*price[m,:] .+ xi[m,:]; # Expected utility | |
s[m,:] = mean(exp.(u) ./ (1 .+ sum(exp.(u), dims=2)), dims=1); # Market shares | |
end | |
return s, price, IV, xi | |
end | |
# ----------------------------------------------------------- | |
# Estimate market-level logit with measurement error | |
# ----------------------------------------------------------- | |
@model function estimate_logit_1good(s, x) | |
beta ~ Normal(0, 3) # Prior for utility parameter | |
sigma ~ InverseGamma(2, 3) # Prior for sd of (mean zero) measurement error | |
s_noME = exp.(x.* beta) ./ (1 .+ exp.(x .* beta)); | |
s ~ MvNormal(s_noME, sigma^2 .* FillArrays.I) | |
end | |
Random.seed!(12345) | |
# Simulate market shares and characteristics | |
s, x = sim_agg_logit(beta_true, 0.0, M, J) | |
# Add measurement error | |
s = s .+ randn(M,1) .* me_sigma_true; | |
# Estimation/sampling | |
chain = sample(estimate_logit_1good(s, dropdims(x, dims=2)), Turing.NUTS(), 5000) | |
# Plot results -- gets very close to the true values on average | |
plot(chain) | |
# ----------------------------------------------------------- | |
# Converted the original function to take in a dataframe | |
# Running the same check as above | |
# ----------------------------------------------------------- | |
@model function estimate_logit_1good_v2(df) | |
beta ~ Normal(0, 2) # Prior for utility parameter | |
sigma ~ InverseGamma(1, 3) # Prior for sd of (mean zero) measurement error | |
s_noME = exp.(df.x.* beta) ./ (1 .+ exp.(df.x .* beta)); | |
df.s ~ MvNormal(s_noME, sigma^2 .* FillArrays.I) | |
end | |
df = DataFrame(s = dropdims(s, dims=2), x = dropdims(x, dims=2)) | |
chain = sample(estimate_logit_1good_v2(df), Turing.NUTS(), 5000) | |
plot(chain) | |
# ---------------------------------------------------------- | |
# Extend code to allow for more than one inside good and | |
# multiple characteristics in utility function | |
# (haven't tested with multiple goods though, this is just for fun) | |
# ---------------------------------------------------------- | |
@model function logit(df; linear::Vector{Symbol} = [:x]) | |
# Unpack and define priors | |
K1 = length(linear); | |
beta ~ MvNormal(LinearAlgebra.Diagonal(FillArrays.Fill(4, K1))) # Prior for utility parameter | |
me_sigma ~ InverseGamma(1, 3) # Prior for sd of (mean zero) measurement error | |
# Select data for characteristics in utility function | |
linear = Matrix(df[!, linear]); | |
# Calculate market shares at given parameters | |
df[!,"expu"] = dropdims(exp.(sum(linear .* beta', dims=2)), dims=2); | |
gdf = groupby(df, :market_ids); | |
cdf = combine(gdf, :expu => sum => :sumexpu); | |
df = leftjoin(df, cdf, on = :market_ids); | |
s_noME = df.expu ./ (1 .+ df.sumexpu) ; | |
# Market shares: pass to likelihood by adding measurement error | |
df.s ~ MvNormal(s_noME, me_sigma^2 .* FillArrays.I) | |
end | |
Random.seed!(12345) | |
J = 1; | |
M = 750; | |
# Simulate data, convert to dataframe | |
s, x = sim_agg_logit(beta_true, 0.0, M, J) | |
# Add measurement error | |
s = s .+ randn(M,J) .* me_sigma_true; | |
df = DataFrame(s = dropdims(s, dims=2), x = dropdims(x, dims=2)) | |
df[!,"market_ids"] = 1:size(df,1); | |
df[!,"x2"] .= rand(size(df,1)) | |
# Estimation | |
chain = sample(logit(df, linear = [:x, :x2]), Turing.NUTS(), 5000) | |
plot(chain) | |
# ------------------------------------------------------------------------------ | |
# Now trying to incorporate market level shifters (xi) | |
# For this, I'm using Shoshana Vasserman and Jim Savage's trick: | |
# - Treat eaxh xi as a parameter and model price endogeneity directly | |
# - Unlike standard BLP, where xi is calculated via a contraction mapping, | |
# here xi is identified in part by differences in market shares and in part by | |
# differences in prices across markets | |
# ------------------------------------------------------------------------------ | |
@model function estimate_logit_with_xi(df; linear::Vector{Symbol} = [:x], iv = :iv) | |
# Unpack and set priors | |
K1 = length(linear); | |
beta ~ MvNormal(LinearAlgebra.Diagonal(FillArrays.Fill(3, K1))) # Prior for utility parameter | |
endog_beta ~ MvNormal(LinearAlgebra.Diagonal(FillArrays.Fill(3, 1))) # Prior for coefficient of X in price equation | |
endog_gamma ~ MvNormal(LinearAlgebra.Diagonal(FillArrays.Fill(3, 1))) # Prior for coefficient of xi in price equation | |
price_sigma ~ InverseGamma(1, 3) # Prior for sd of log(price) | |
me_sigma ~ InverseGamma(1, 3) # Prior for sd of (mean zero) measurement error | |
xi ~ MvNormal(LinearAlgebra.Diagonal(FillArrays.Fill(1, size(df,1)))) | |
# Prices: pass to likelihood by modeling endogeneity | |
pricemean = df[!,iv] .* endog_beta + xi .* endog_gamma; | |
df.price ~ MvNormal(pricemean, price_sigma^2 .* FillArrays.I) | |
# Calculate model-implied market shares | |
linear = Matrix(df[!, linear]); | |
xi = reshape(xi, size(df,1),1); | |
df[!,"expu"] = dropdims(exp.(sum(linear .* beta' + xi, dims=2)), dims=2); | |
gdf = groupby(df, :market_ids); | |
cdf = combine(gdf, :expu => sum => :sumexpu); | |
df = leftjoin(df, cdf, on = :market_ids); | |
s_noME = df.expu ./ (1 .+ df.sumexpu); # Market shares without measurement error | |
# Market shares: pass to likelihood by adding measurement error | |
df.s ~ MvNormal(s_noME, me_sigma^2 .* FillArrays.I) | |
end | |
# Testing the function above | |
Random.seed!(123321) | |
M=150; | |
price_sigma = 0.1; | |
endog_beta = 1; | |
endog_gamma = 0.1; | |
s, price, iv, xi = sim_agg_logit_with_endogeneity(beta_true, 0.0, endog_beta, endog_gamma, price_sigma, M, J) | |
s = s .+ randn(M,1) .* me_sigma_true; | |
df = DataFrame(s = dropdims(s, dims=2), | |
price = dropdims(price, dims=2), | |
iv = dropdims(iv, dims=2), xi = dropdims(xi, dims=2)) | |
df[!,"market_ids"] = 1:size(df,1); | |
Turing.setadbackend(:forwarddiff) | |
# Estimation | |
# Uncomment for multi-threads # chain = sample(logitWithShocks(df, linear = [:price], iv = :iv), Turing.NUTS(), MCMCThreads(), 2000, Threads.nthreads()) | |
chain = sample(estimate_logit_with_xi(df, linear = [:price], iv = :iv), Turing.NUTS(), 1000) | |
# Compare real xi to median posterior xi | |
scatter(df.xi, [median(chain["xi[$i]"]) for i = 1:M],label = false) | |
plot!(df.xi, df.xi, label = "45 degree line", legend = :topleft) | |
plot!(xlabel = "True Xi", ylabel = "Median Posterior Xi") | |
# -------------------------------------------------------------------- | |
# Finishing the puzzle: add a random coefficient to price | |
# -------------------------------------------------------------------- | |
@model function estimate_blp(df; linear::Vector{Symbol} = [:price], | |
nonlinear::Vector{Symbol} = [:price], | |
iv = :iv, | |
draws = randn(size(df,1),100)) | |
df_local = copy(df); | |
K1 = length(linear); | |
K2 = length(nonlinear); | |
N = size(df_local,1); | |
S = size(draws,2); | |
beta ~ MvNormal(LinearAlgebra.Diagonal(FillArrays.Fill(2, K1))) # Prior for utility parameter | |
# sigma ~ filldist(truncated(Normal(0,1), 0, Inf), K2) | |
sigma ~ filldist(InverseGamma(0.3,0.3), K2) | |
endog_beta ~ MvNormal(LinearAlgebra.Diagonal(FillArrays.Fill(3, 1))) # Prior for coefficient of X in price equation | |
endog_gamma ~ MvNormal(LinearAlgebra.Diagonal(FillArrays.Fill(3, 1))) # Prior for coefficient of xi in price equation | |
price_sigma ~ InverseGamma(1, 3) # Prior for sd of log(price) | |
me_sigma ~ InverseGamma(1, 3) # Prior for sd of (mean zero) measurement error | |
xi ~ MvNormal(LinearAlgebra.Diagonal(FillArrays.Fill(1, size(df,1)))) | |
# Prices: pass to likelihood by modeling endogeneity | |
pricemean = df[!,iv] .* endog_beta + xi .* endog_gamma; | |
df.price ~ MvNormal(pricemean, price_sigma^2 .* FillArrays.I) | |
# Calculate market shares | |
linearmat = Matrix(df[!, linear]) + xi; | |
p = zeros(eltype(beta), N); | |
for i = 1:S | |
beta_i = reshape(beta, 1, K1); | |
beta_i = repeat(beta_i, N,1) + sigma .* draws[:,i]; | |
df_local[!,"expu"] = dropdims(exp.(sum(linearmat .* beta_i, dims=2)), dims=2); | |
gdf = groupby(df_local, :market_ids); | |
cdf = combine(gdf, :expu => sum => :sumexpu); | |
df_i = leftjoin(df_local, cdf, on = :market_ids); | |
p = p + df_i.expu ./(1 .+ df_i.sumexpu); | |
@assert (maximum(df_i.expu ./(1 .+ df_i.sumexpu)) <=1) | isinf(maximum(df_i.expu ./(1 .+ df_i.sumexpu))) | isnan(maximum(df_i.expu ./(1 .+ df_i.sumexpu))) | |
end | |
s_noME = p ./S; | |
# Market shares: pass to likelihood by adding measurement error | |
df.s ~ MvNormal(s_noME, me_sigma .* FillArrays.I) | |
end | |
# Testing | |
Random.seed!(123321) | |
rc_sigma = 0.5; | |
M = 100; | |
s, price, iv, xi = sim_agg_logit_with_endogeneity(beta_true, rc_sigma, endog_beta, endog_gamma, price_sigma, M, J) | |
s = s .+ randn(M,1) .* me_sigma_true; | |
df = DataFrame(s = dropdims(s, dims=2), | |
price = dropdims(price, dims=2), | |
iv = dropdims(iv, dims=2), xi = dropdims(xi, dims=2)) | |
df[!,"market_ids"] = 1:size(df,1); | |
Turing.setadbackend(:forwarddiff) | |
draws = randn(size(df,1),30); | |
base = 3; | |
FRAC.HaltonDraws!(draws, base, skip=1000, distr = Normal()) | |
# Uncomment for multi-thread # chain = sample(estimate_blp(df, linear = [:price], nonlinear = [:price], draws = draws), | |
# Turing.NUTS(), MCMCThreads(), 1000, 4) | |
chain = sample(estimate_blp(df, linear = [:price], nonlinear = [:price], draws = draws), | |
Turing.NUTS(),500) | |
# Plot results: | |
density(chain["sigma[1]"], label = false) # SD of random coefficient on price | |
vline!([rc_sigma], label = "True RC SD", legend = :topright) | |
plot!(xlabel = "Sigma", ylabel = "Density") | |
density(chain["beta[1]"], label = false) # mean of random coefficient on price | |
vline!([beta_true], label = "True Beta", legend = :topright) | |
plot!(xlabel = "Beta", ylabel = "Density") | |
scatter(df.xi, [median(chain["xi[$i]"]) for i = 1:M],label = false) # Compare real xi to median posterior xi | |
plot!(df.xi, df.xi, label = "45 degree line", legend = :topleft) | |
plot!(xlabel = "True Xi", ylabel = "Median Posterior Xi") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment