Skip to content

Instantly share code, notes, and snippets.

@jamesbrandecon
Created December 4, 2023 03:02
Show Gist options
  • Save jamesbrandecon/12944aa4757e82a7997c2845172e5abb to your computer and use it in GitHub Desktop.
Save jamesbrandecon/12944aa4757e82a7997c2845172e5abb to your computer and use it in GitHub Desktop.
Basic implementation of Bayesian Logit and BLP
# Basic implementation of Bayesian Logit and BLP
# In both cases, we model the observed market shares as coming from
# a logit model with measurement error.
using Plots
using StatsPlots
using DataFrames
using Turing, FillArrays
using LinearAlgebra, DynamicHMC
# Using Pkg
# Pkg.add(url = "https://github.com/jamesbrandecon/FRAC.jl", rev = "rewrite")
using Random, FRAC # Need FRAC for Halton draws (for random coefficients in BLP)
M = 500; # 200 markets
J = 1; # 1 inside option
beta_true = 2; # coefficient on X in utility function
me_sigma_true = 0.2; # measurement error sd
# -----------------------------------------------------------
# Simulation functions
# -----------------------------------------------------------
# Simulate market-level demand for a single good
# utility of outside option set to 0
function sim_agg_logit(theta, sigma, M, J)
@assert J==1
# Simulate data
X = rand(M,J); # product characteristics for M markets, J inside goods
s = zeros(M,J);
for m = 1:M
theta_i = theta .+ randn(10000,1) .*sigma;
u = theta_i.*X[m,:]; # Expected utility
s[m,:] = mean(exp.(u) ./ (1 .+ sum(exp.(u), dims=2)), dims=1); # Market shares
end
return s, X
end
# Adding in product-market demand shifters xi which are mildly correlated with price
function sim_agg_logit_with_endogeneity(theta, sigma, beta_endog, gamma_endog, price_sigma, M, J)
@assert J==1
# Simulate data
IV = rand(M,J); # IV
s = zeros(M,J);
xi = randn(M,J); # market level shifters
price = beta_endog .*IV .+ gamma_endog .*xi .+ price_sigma.*randn(M,J); # price for M markets, J inside goods
for m = 1:M
theta_i = theta .+ randn(10000,1) .*sigma;
u = theta_i.*price[m,:] .+ xi[m,:]; # Expected utility
s[m,:] = mean(exp.(u) ./ (1 .+ sum(exp.(u), dims=2)), dims=1); # Market shares
end
return s, price, IV, xi
end
# -----------------------------------------------------------
# Estimate market-level logit with measurement error
# -----------------------------------------------------------
@model function estimate_logit_1good(s, x)
beta ~ Normal(0, 3) # Prior for utility parameter
sigma ~ InverseGamma(2, 3) # Prior for sd of (mean zero) measurement error
s_noME = exp.(x.* beta) ./ (1 .+ exp.(x .* beta));
s ~ MvNormal(s_noME, sigma^2 .* FillArrays.I)
end
Random.seed!(12345)
# Simulate market shares and characteristics
s, x = sim_agg_logit(beta_true, 0.0, M, J)
# Add measurement error
s = s .+ randn(M,1) .* me_sigma_true;
# Estimation/sampling
chain = sample(estimate_logit_1good(s, dropdims(x, dims=2)), Turing.NUTS(), 5000)
# Plot results -- gets very close to the true values on average
plot(chain)
# -----------------------------------------------------------
# Converted the original function to take in a dataframe
# Running the same check as above
# -----------------------------------------------------------
@model function estimate_logit_1good_v2(df)
beta ~ Normal(0, 2) # Prior for utility parameter
sigma ~ InverseGamma(1, 3) # Prior for sd of (mean zero) measurement error
s_noME = exp.(df.x.* beta) ./ (1 .+ exp.(df.x .* beta));
df.s ~ MvNormal(s_noME, sigma^2 .* FillArrays.I)
end
df = DataFrame(s = dropdims(s, dims=2), x = dropdims(x, dims=2))
chain = sample(estimate_logit_1good_v2(df), Turing.NUTS(), 5000)
plot(chain)
# ----------------------------------------------------------
# Extend code to allow for more than one inside good and
# multiple characteristics in utility function
# (haven't tested with multiple goods though, this is just for fun)
# ----------------------------------------------------------
@model function logit(df; linear::Vector{Symbol} = [:x])
# Unpack and define priors
K1 = length(linear);
beta ~ MvNormal(LinearAlgebra.Diagonal(FillArrays.Fill(4, K1))) # Prior for utility parameter
me_sigma ~ InverseGamma(1, 3) # Prior for sd of (mean zero) measurement error
# Select data for characteristics in utility function
linear = Matrix(df[!, linear]);
# Calculate market shares at given parameters
df[!,"expu"] = dropdims(exp.(sum(linear .* beta', dims=2)), dims=2);
gdf = groupby(df, :market_ids);
cdf = combine(gdf, :expu => sum => :sumexpu);
df = leftjoin(df, cdf, on = :market_ids);
s_noME = df.expu ./ (1 .+ df.sumexpu) ;
# Market shares: pass to likelihood by adding measurement error
df.s ~ MvNormal(s_noME, me_sigma^2 .* FillArrays.I)
end
Random.seed!(12345)
J = 1;
M = 750;
# Simulate data, convert to dataframe
s, x = sim_agg_logit(beta_true, 0.0, M, J)
# Add measurement error
s = s .+ randn(M,J) .* me_sigma_true;
df = DataFrame(s = dropdims(s, dims=2), x = dropdims(x, dims=2))
df[!,"market_ids"] = 1:size(df,1);
df[!,"x2"] .= rand(size(df,1))
# Estimation
chain = sample(logit(df, linear = [:x, :x2]), Turing.NUTS(), 5000)
plot(chain)
# ------------------------------------------------------------------------------
# Now trying to incorporate market level shifters (xi)
# For this, I'm using Shoshana Vasserman and Jim Savage's trick:
# - Treat eaxh xi as a parameter and model price endogeneity directly
# - Unlike standard BLP, where xi is calculated via a contraction mapping,
# here xi is identified in part by differences in market shares and in part by
# differences in prices across markets
# ------------------------------------------------------------------------------
@model function estimate_logit_with_xi(df; linear::Vector{Symbol} = [:x], iv = :iv)
# Unpack and set priors
K1 = length(linear);
beta ~ MvNormal(LinearAlgebra.Diagonal(FillArrays.Fill(3, K1))) # Prior for utility parameter
endog_beta ~ MvNormal(LinearAlgebra.Diagonal(FillArrays.Fill(3, 1))) # Prior for coefficient of X in price equation
endog_gamma ~ MvNormal(LinearAlgebra.Diagonal(FillArrays.Fill(3, 1))) # Prior for coefficient of xi in price equation
price_sigma ~ InverseGamma(1, 3) # Prior for sd of log(price)
me_sigma ~ InverseGamma(1, 3) # Prior for sd of (mean zero) measurement error
xi ~ MvNormal(LinearAlgebra.Diagonal(FillArrays.Fill(1, size(df,1))))
# Prices: pass to likelihood by modeling endogeneity
pricemean = df[!,iv] .* endog_beta + xi .* endog_gamma;
df.price ~ MvNormal(pricemean, price_sigma^2 .* FillArrays.I)
# Calculate model-implied market shares
linear = Matrix(df[!, linear]);
xi = reshape(xi, size(df,1),1);
df[!,"expu"] = dropdims(exp.(sum(linear .* beta' + xi, dims=2)), dims=2);
gdf = groupby(df, :market_ids);
cdf = combine(gdf, :expu => sum => :sumexpu);
df = leftjoin(df, cdf, on = :market_ids);
s_noME = df.expu ./ (1 .+ df.sumexpu); # Market shares without measurement error
# Market shares: pass to likelihood by adding measurement error
df.s ~ MvNormal(s_noME, me_sigma^2 .* FillArrays.I)
end
# Testing the function above
Random.seed!(123321)
M=150;
price_sigma = 0.1;
endog_beta = 1;
endog_gamma = 0.1;
s, price, iv, xi = sim_agg_logit_with_endogeneity(beta_true, 0.0, endog_beta, endog_gamma, price_sigma, M, J)
s = s .+ randn(M,1) .* me_sigma_true;
df = DataFrame(s = dropdims(s, dims=2),
price = dropdims(price, dims=2),
iv = dropdims(iv, dims=2), xi = dropdims(xi, dims=2))
df[!,"market_ids"] = 1:size(df,1);
Turing.setadbackend(:forwarddiff)
# Estimation
# Uncomment for multi-threads # chain = sample(logitWithShocks(df, linear = [:price], iv = :iv), Turing.NUTS(), MCMCThreads(), 2000, Threads.nthreads())
chain = sample(estimate_logit_with_xi(df, linear = [:price], iv = :iv), Turing.NUTS(), 1000)
# Compare real xi to median posterior xi
scatter(df.xi, [median(chain["xi[$i]"]) for i = 1:M],label = false)
plot!(df.xi, df.xi, label = "45 degree line", legend = :topleft)
plot!(xlabel = "True Xi", ylabel = "Median Posterior Xi")
# --------------------------------------------------------------------
# Finishing the puzzle: add a random coefficient to price
# --------------------------------------------------------------------
@model function estimate_blp(df; linear::Vector{Symbol} = [:price],
nonlinear::Vector{Symbol} = [:price],
iv = :iv,
draws = randn(size(df,1),100))
df_local = copy(df);
K1 = length(linear);
K2 = length(nonlinear);
N = size(df_local,1);
S = size(draws,2);
beta ~ MvNormal(LinearAlgebra.Diagonal(FillArrays.Fill(2, K1))) # Prior for utility parameter
# sigma ~ filldist(truncated(Normal(0,1), 0, Inf), K2)
sigma ~ filldist(InverseGamma(0.3,0.3), K2)
endog_beta ~ MvNormal(LinearAlgebra.Diagonal(FillArrays.Fill(3, 1))) # Prior for coefficient of X in price equation
endog_gamma ~ MvNormal(LinearAlgebra.Diagonal(FillArrays.Fill(3, 1))) # Prior for coefficient of xi in price equation
price_sigma ~ InverseGamma(1, 3) # Prior for sd of log(price)
me_sigma ~ InverseGamma(1, 3) # Prior for sd of (mean zero) measurement error
xi ~ MvNormal(LinearAlgebra.Diagonal(FillArrays.Fill(1, size(df,1))))
# Prices: pass to likelihood by modeling endogeneity
pricemean = df[!,iv] .* endog_beta + xi .* endog_gamma;
df.price ~ MvNormal(pricemean, price_sigma^2 .* FillArrays.I)
# Calculate market shares
linearmat = Matrix(df[!, linear]) + xi;
p = zeros(eltype(beta), N);
for i = 1:S
beta_i = reshape(beta, 1, K1);
beta_i = repeat(beta_i, N,1) + sigma .* draws[:,i];
df_local[!,"expu"] = dropdims(exp.(sum(linearmat .* beta_i, dims=2)), dims=2);
gdf = groupby(df_local, :market_ids);
cdf = combine(gdf, :expu => sum => :sumexpu);
df_i = leftjoin(df_local, cdf, on = :market_ids);
p = p + df_i.expu ./(1 .+ df_i.sumexpu);
@assert (maximum(df_i.expu ./(1 .+ df_i.sumexpu)) <=1) | isinf(maximum(df_i.expu ./(1 .+ df_i.sumexpu))) | isnan(maximum(df_i.expu ./(1 .+ df_i.sumexpu)))
end
s_noME = p ./S;
# Market shares: pass to likelihood by adding measurement error
df.s ~ MvNormal(s_noME, me_sigma .* FillArrays.I)
end
# Testing
Random.seed!(123321)
rc_sigma = 0.5;
M = 100;
s, price, iv, xi = sim_agg_logit_with_endogeneity(beta_true, rc_sigma, endog_beta, endog_gamma, price_sigma, M, J)
s = s .+ randn(M,1) .* me_sigma_true;
df = DataFrame(s = dropdims(s, dims=2),
price = dropdims(price, dims=2),
iv = dropdims(iv, dims=2), xi = dropdims(xi, dims=2))
df[!,"market_ids"] = 1:size(df,1);
Turing.setadbackend(:forwarddiff)
draws = randn(size(df,1),30);
base = 3;
FRAC.HaltonDraws!(draws, base, skip=1000, distr = Normal())
# Uncomment for multi-thread # chain = sample(estimate_blp(df, linear = [:price], nonlinear = [:price], draws = draws),
# Turing.NUTS(), MCMCThreads(), 1000, 4)
chain = sample(estimate_blp(df, linear = [:price], nonlinear = [:price], draws = draws),
Turing.NUTS(),500)
# Plot results:
density(chain["sigma[1]"], label = false) # SD of random coefficient on price
vline!([rc_sigma], label = "True RC SD", legend = :topright)
plot!(xlabel = "Sigma", ylabel = "Density")
density(chain["beta[1]"], label = false) # mean of random coefficient on price
vline!([beta_true], label = "True Beta", legend = :topright)
plot!(xlabel = "Beta", ylabel = "Density")
scatter(df.xi, [median(chain["xi[$i]"]) for i = 1:M],label = false) # Compare real xi to median posterior xi
plot!(df.xi, df.xi, label = "45 degree line", legend = :topleft)
plot!(xlabel = "True Xi", ylabel = "Median Posterior Xi")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment