Created
November 24, 2023 20:03
-
-
Save jamesbrandecon/b72b29f184daa1ec2493588607180160 to your computer and use it in GitHub Desktop.
A one-file implementation of Fox, Kim, Ryan, Bajari (2011)'s linear estimator for the mixed logit demand model, in the style of FRAC.jl and NPDemand.jl.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This file contains a basic implementation of the FKRB estimator for the random coefficients logit model in Julia. | |
# In particular, it implements a constrained elastic net version of the estimator with aggregate data. | |
# -- Fox, Kim, Ryan, and Bajari (2011) https://onlinelibrary.wiley.com/doi/abs/10.3982/QE49 | |
# Written by James Brand, 2023 | |
# The main components of the code include: | |
# - The `logexpratio` function, which calculates market shares for a given parameter vector. | |
# - The `FKRBProblem` struct, which represents the FKRB problem and stores relevant data and results. | |
# - The `define_problem` function, which defines the FKRB problem based on input data and variables. | |
# - The `generate_regressors_aggregate` function, which generates regressors for the version of the approach that uses aggregate data. | |
# - The `make_grid_points` function, which generates a grid of points for evaluating the FKRB model. | |
# - The `elasticnet` function, which performs elastic net regularization for linear regression. | |
# The code is only meant to serve as an example, not as a production-ready implementation. | |
# I've tried to write it in the style of my packages FRAC.jl and NPDemand.jl, but have not tested it beyond what is shown here | |
using CSV, DataFrames, LinearAlgebra | |
using SCS, Convex, Distributions | |
using FRAC, Plots | |
import FRAC.define_problem # Going to extend the define_problem function from FRAC.jl | |
# FRAC installation: Pkg.add(url = "https://github.com/jamesbrandecon/FRAC.jl", rev = "rewrite") | |
logexpratio(x, β) = exp.(x * β) ./ (1 .+ sum(exp.(x * β))); | |
mutable struct FKRBProblem | |
data | |
linear # Array{String,1} | |
nonlinear # Array{String,1} | |
iv # Array{String,1} | |
grid_points # Matrix{Float,2} -- (G,K) each row is a K-dimensional point, where K = length(linear) + length(nonlinear) | |
results # Vector of weights of length G | |
train # Vector{Int} -- indices of the training data in data -- not implemented | |
end | |
""" | |
elasticnet(Y, X, γ, λ = 0) | |
Performs elastic net regularization for linear regression. | |
Code modified from Convex.jl documentation: https://jump.dev/Convex.jl/stable/examples/general_examples/lasso_regression/ | |
""" | |
function elasticnet(Y, X, γ, λ = 0) | |
(T, K) = (size(X, 1), size(X, 2)) | |
X = Matrix(X); | |
Y = Vector(Y); | |
b_ls = X \ Y #LS estimate of weights, no restrictions | |
Q = X'X / T | |
c = X'Y / T #c'b = Y'X*b | |
b = Variable(K) #define variables to optimize over | |
L1 = quadform(b, Q) #b'Q*b | |
L2 = dot(c, b) #c'b | |
L3 = norm(b, 1) #sum(|b|) | |
L4 = sumsquares(b) #sum(b^2) | |
# define constraints | |
# Nonnegativity | |
constraints = [b >= 0] | |
# Sum of coefficients == 1 | |
constraints = [constraints; sum(b) == 1] | |
if λ > 0 | |
Sol = minimize(L1 - 2 * L2 + γ * L3 + λ * L4, constraints...) #u'u/T + γ*sum(|b|) + λ*sum(b^2), where u = Y-Xb | |
else | |
Sol = minimize(L1 - 2 * L2 + γ * L3, constraints...) #u'u/T + γ*sum(|b|) where u = Y-Xb | |
end | |
solve!(Sol, SCS.Optimizer; silent_solver = false) | |
Sol.status == Convex.MOI.OPTIMAL ? b_i = vec(evaluate(b)) : b_i = NaN | |
return b_i, b_ls | |
end | |
""" | |
make_grid_points(data, linear, nonlinear) | |
Generates a grid of points for evaluating the FKRB model. Currently a very simple implementation, though easy to | |
extend to more complex grids. | |
""" | |
function make_grid_points(data, linear, nonlinear) | |
all_vars = union(linear, nonlinear); | |
# Super simple construction: | |
# range between -3 and 3, with 0.1 increments | |
# Flipped for second variable to force price coefficient to be negative | |
# NOTE: structure of grid_points bakes in correlation structure! | |
# I found that small changes here made a big difference. For full impelementation, careful grid selection is important. | |
grid_step = 0.05; | |
univariate_grid = collect(-3:grid_step:3); | |
univariate_grid_reversed = collect(3:(-1*grid_step):-3); | |
grid_points = repeat(univariate_grid, 1, length(all_vars)); | |
grid_points[:,2] = univariate_grid_reversed; | |
# Could also run FRAC on the data to get initial distribution of preferences | |
return grid_points | |
end | |
""" | |
define_problem(;method = "FKRB", data=[], linear=[], nonlinear=[], iv=[], train=[]) | |
Defines the FKRB problem, which is just a container for the data and results. | |
""" | |
function define_problem(;method = "FKRB", data=[], linear=[], nonlinear=[], iv=[], train=[]) | |
# Any checks of the inputs | |
# Are linear/nonlinear/iv all vectors of strings | |
@assert eltype(linear) <: AbstractString | |
@assert eltype(nonlinear) <: AbstractString | |
@assert eltype(iv) <: AbstractString | |
# Are the variables in linear/nonlinear/iv all in data? | |
@assert all([x ∈ names(data) for x ∈ linear]) | |
@assert all([x ∈ names(data) for x ∈ nonlinear]) | |
@assert all([x ∈ names(data) for x ∈ iv]) | |
println("Note: argument `iv' is not used in this version of the code") | |
grid_points = make_grid_points(data, linear, nonlinear); | |
# Return the problem | |
problem = FKRBProblem(data, linear, nonlinear, iv, grid_points, [], train) | |
return problem | |
end | |
""" | |
generate_regressors_aggregate(problem::FKRBProblem) | |
Generates the RHS regressors for the version of the FKRB estimator that uses aggregate data. | |
""" | |
function generate_regressors_aggregate(problem) | |
# Unpack | |
data = problem.data | |
linear = problem.linear; | |
nonlinear = problem.nonlinear; | |
all_vars = union(linear, nonlinear); | |
grid_points = problem.grid_points; | |
# Generate RHS regressors -- each term is a logit-form market share, calculated at a given grid point of parameters | |
regressors = Array{Float64}(undef, size(data, 1), size(grid_points, 1)); | |
for m ∈ sort(unique(data.market_ids)) | |
i = 1; | |
for g ∈ eachrow(grid_points) | |
X_m = Matrix(data[data.market_ids .== m, all_vars]); | |
regressors[data.market_ids .== m, i] = logexpratio(X_m, g); | |
i +=1; | |
end | |
end | |
return regressors | |
end | |
""" | |
estimate!(problem::FKRBProblem; gamma = 0.3, lambda = 0.0) | |
Estimates the FKRB model using constrained elastic net. Problem is solved using Convex.jl, and estimated weights | |
are constrained to be nonnegative and sum to 1. Results are stored in problem.results. | |
""" | |
function estimate!(problem::FKRBProblem; gamma = 0.3, lambda = 0.0) | |
data = problem.data; | |
grid_points = problem.grid_points; | |
if isempty(problem.train) | |
train = 1:size(data, 1); | |
else | |
train = problem.train; | |
end | |
# Generate the RHS regressors that will show up in the estimation problem | |
regressors = generate_regressors_aggregate(fkrb_problem, fkrb_problem.grid_points); | |
# Combine into a DataFrame with one outcome and many regressors | |
df_inner = DataFrame(regressors, :auto); | |
df_inner[!,"y"] = data.shares; | |
# Estimate the model | |
# Simple version: OLS | |
# @views w = inv(Matrix(df[!,r"x"])' * Matrix(df[!,r"x"])) * Matrix(df[!,r"x"])' * Matrix(df[!,"y"]) | |
# Constrained elastic net: | |
# Currently for fixed user-provided hyperparameters, but could add cross-validation to choose them | |
@views w_en = elasticnet(df_inner[!,"y"], df_inner[!,r"x"], gamma, lambda) | |
problem.results = w_en; | |
end | |
""" | |
plot_cdfs(problem::FKRBProblem) | |
Plots the estimated CDFs of the coefficients, along with the true CDFs (set manually for each simulation). | |
""" | |
function plot_cdfs(problem::FKRBProblem) | |
# Unpack | |
data = problem.data | |
linear = problem.linear; | |
nonlinear = problem.nonlinear; | |
all_vars = union(linear, nonlinear); | |
w = problem.results[1]; | |
grid_points = problem.grid_points; | |
# Calculate the CDFs | |
unique_grid_points1 = sort(unique(grid_points[:,1])); | |
unique_grid_points2 = sort(unique(grid_points[:,2])); | |
cdf1 = [sum(w[findall(grid_points[:,1] .<= unique_grid_points1[i])]) for i in 1:size(unique_grid_points1, 1)] | |
cdf2 = [sum(w[findall(grid_points[:,2] .<= unique_grid_points2[i])]) for i in 1:size(unique_grid_points2, 1)] | |
# Plot the CDFs of the estimated coefficients | |
plot(unique_grid_points1, cdf1, label = "Est. CDF 1", color = :blue, lw = 1.2) | |
plot!(unique_grid_points2, cdf2, label = "Est. CDF 2", color = :red, lw = 1.2, legend = :outerright) | |
# Plot the true CDFs -- change to simulation specifics | |
plot!(unique_grid_points1, cdf.(Normal(1,0.3), unique_grid_points1), color = :blue, ls = :dash, label = "CDF 1, true") | |
plot!(unique_grid_points2, cdf.(Normal(-1,0.3), unique_grid_points2), color = :red, ls =:dash, label = "CDF 2, true") | |
plot!(xlabel = "β", ylabel = "P(b>β)") | |
end | |
# ---------------------------------------------------------------------- | |
# Simulation example -- two characteristics, both with random coefficients | |
# ---------------------------------------------------------------------- | |
B=10; | |
T = 1000; # number of markets/2 | |
J1 = 2; # number of products in half of markets | |
J2 = 4; # number of products in the other half of markets | |
preference_means = [-1.0 1.0]; # means of the normal distributions of preferences -- first column is price, second is x | |
preference_SDs = [0.3 0.3]; # standard deviations of the normal distributions of preferences | |
df = FRAC.sim_logit_vary_J(J1, J2, T, B, preference_means, preference_SDs, 0, with_market_FEs = true); | |
fkrb_problem = define_problem(method = "FKRB", | |
data = df, linear = ["x", "prices"], | |
nonlinear = ["x", "prices"], | |
iv = ["demand_instruments0"], | |
train = []); | |
estimate!(fkrb_problem, gamma = 0.01, lambda = 1e-6) | |
plot_cdfs(fkrb_problem) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment