Last active
January 10, 2021 18:41
-
-
Save darribas/7805381 to your computer and use it in GitHub Desktop.
Fixed-Effects panel OLS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Fixed-effects OLS Panel models | |
Author: Dani Arribas-Bel <@darribas> | |
Copyright (c) 2013, Dani Arribas-Bel | |
All rights reserved. | |
LICENSE | |
------- | |
Redistribution and use in source and binary forms, with or without | |
modification, are permitted provided that the following conditions are met: | |
* Redistributions of source code must retain the above copyright notice, this | |
list of conditions and the following disclaimer. | |
* Redistributions in binary form must reproduce the above copyright | |
notice, this list of conditions and the following disclaimer in the | |
documentation and/or other materials provided with the distribution. | |
* The name of the author may not be used to endorse or | |
promote products derived from this software without specific prior written | |
permission. | |
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND | |
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, | |
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | |
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR | |
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF | |
USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED | |
AND | |
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING | |
IN | |
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
POSSIBILITY OF SUCH DAMAGE. | |
''' | |
import pandas as pd | |
import pysal as ps | |
import numpy as np | |
from pysal.spreg.diagnostics import condition_index as ci | |
def one_way_fe(y, x, fe1, robust=None, name_y=None, name_x=None, model_name=None): | |
''' | |
One way Fixed-Effect panel | |
... | |
Arguments | |
--------- | |
y : ndarray | |
Flat array or list with dependent variable | |
x : ndarray | |
Array with exogenous variables | |
fe1 : ndarray/list | |
Sequence with memberships to the Fixed effect (e.g. | |
individiual or neighborhood) | |
robust : str | |
Argument for PySAL OLS. If None (default), standard OLS | |
inference is performed. If 'white', white | |
heteroskedasticity is applied. | |
name_y : str | |
[Optional] Name of dependent variable | |
name_x : list | |
[Optional] Name of exogenous variables | |
model_name : str | |
[Optional] Name of model | |
Returns | |
------- | |
m : ps.spreg.BaseOLS | |
Barebones PySAL OLS model object | |
''' | |
yx = pd.DataFrame(x) | |
yx['y'] = y | |
yx['fe1'] = fe1 | |
demeaner1 = yx.groupby('fe1').mean() | |
demeaner1 = yx[['fe1']].join(demeaner1, on='fe1')\ | |
.drop(['fe1'], axis=1) | |
yx = yx.drop(['fe1'], axis=1) | |
yxd = yx - demeaner1 | |
m = ps.spreg.BaseOLS(yxd[['y']].values, yxd.drop('y', axis=1).values, \ | |
robust=robust) | |
if name_y: | |
m.name_y = name_y | |
else: | |
m.name_y = 'y' | |
if name_x: | |
m.name_x = name_x | |
else: | |
m.name_x = ['X'+str(i) for i in range(x.shape[1])] | |
from pysal.spreg.diagnostics import t_stat, r2 | |
m.t_stat = t_stat(m) | |
m.r2 = r2(m) | |
m.model_name = m.name_y | |
m.fe = 'FE-1' | |
m.fe2 = None | |
return m | |
def two_way_fe(y, x, fe1, fe2, robust=None, name_y=None, name_x=None, model_name=None): | |
''' | |
Two ways Fixed-Effect panel | |
... | |
Arguments | |
--------- | |
y : ndarray | |
Flat array or list with dependent variable | |
x : ndarray | |
Array with exogenous variables | |
fe1 : ndarray/list | |
Sequence with memberships to the first Fixed effect (e.g. | |
individiual or neighborhood) | |
fe2 : ndarray/list | |
Sequence with memberships to the second Fixed effect (e.g. | |
time) | |
robust : str | |
Argument for PySAL OLS. If None (default), standard OLS | |
inference is performed. If 'white', white | |
heteroskedasticity is applied. | |
name_y : str | |
[Optional] Name of dependent variable | |
name_x : list | |
[Optional] Name of exogenous variables | |
model_name : str | |
[Optional] Name of model | |
Returns | |
------- | |
m : ps.spreg.BaseOLS | |
Barebones PySAL OLS model object | |
''' | |
yx = pd.DataFrame(x) | |
yx['y'] = y | |
yx['fe1'] = fe1 | |
yx['fe2'] = fe2 | |
demeaner1 = yx.drop('fe2', axis=1).groupby('fe1').mean() | |
demeaner1 = yx[['fe1']].join(demeaner1, on='fe1')\ | |
.drop(['fe1'], axis=1) | |
demeaner2 = yx.drop('fe1', axis=1).groupby('fe2').mean() | |
demeaner2 = yx[['fe2']].join(demeaner2, on='fe2')\ | |
.drop(['fe2'], axis=1) | |
yx = yx.drop(['fe1', 'fe2'], axis=1) | |
yxd = yx - demeaner1 - demeaner2 + yx.mean() | |
m = ps.spreg.BaseOLS(yxd[['y']].values, yxd.drop('y', axis=1).values, \ | |
robust=robust) | |
if name_y: | |
m.name_y = name_y | |
else: | |
m.name_y = 'y' | |
if name_x: | |
m.name_x = name_x | |
else: | |
m.name_x = ['X'+str(i) for i in range(x.shape[1])] | |
from pysal.spreg.diagnostics import t_stat, r2 | |
m.t_stat = t_stat(m) | |
m.r2 = r2(m) | |
m.model_name = m.name_y | |
m.fe = 'FE-1' | |
m.fe2 = 'FE-2' | |
return m | |
if __name__ == '__main__': | |
# DGP | |
n = 1000 | |
d1 = 10 | |
d2 = 100 | |
seed = np.random.seed(1234) | |
x = np.random.random((n, 3)) | |
# Unbalanced panel | |
fe1 = np.random.random_integers(0, d1, n).astype(str) | |
fe2 = np.random.random_integers(0, d2, n).astype(str) | |
# Balanced panel | |
d2 = int(n * 1. / d1) | |
fe1 = np.arange(d1) | |
fe2 = np.arange(d2) | |
fe = np.array([(i, j) for i in fe1 for j in fe2]) | |
fe1 = fe[:, 0] | |
fe2 = fe[:, 1] | |
# | |
fe1_d = pd.get_dummies(fe1, 'fe1') | |
fe2_d = pd.get_dummies(fe2, 'fe2') | |
e = np.random.normal(0, 1, (n, 1)) | |
y = np.dot(x, np.array([[1., 1., 1.]]).T) \ | |
+ fe1_d.ix[:, :-1].values.sum(axis=1)[:, None] \ | |
+ e | |
#+ fe2_d.values.sum(axis=1)[:, None] \ | |
# One-way | |
m = one_way_fe(y, x, fe1) | |
xone = np.hstack((x, fe1_d.values)) | |
md = ps.spreg.BaseOLS(y, xone) | |
print "De-meaned | Dummy" | |
for i in range(3): | |
print np.round(m.betas[i][0], 9), " | ", \ | |
np.round(md.betas[i][0], 9) | |
print 'Condition index' | |
print ci(m), " | ", ci(md) | |
# Two-way | |
m = two_way_fe(y, x, fe1, fe2) | |
malt = one_way_fe(y, xone[:, :-1], fe2) | |
xtwo = np.hstack((xone, fe2_d.values[:, 1:])) | |
md = ps.spreg.BaseOLS(y, xtwo) | |
print "\nDe-meaned | 1 demeaned-1 dummy | Dummy" | |
for i in range(3): | |
print np.round(m.betas[i][0], 9), " | ", \ | |
np.round(malt.betas[i][0], 9), " | ", \ | |
np.round(md.betas[i][0], 9) | |
print 'Condition index' | |
print ci(m), " | ",ci(malt), " | ", ci(md) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hello @MarlaWillemse, I'm not sure that sounds more like a numpy/scipy message.
More generally, this code is a bit old, for user access to panel econometrics, I'd suggest
statsmodels
, which I believe now support these methods:https://www.statsmodels.org/stable/index.html