-
-
Save JoostImpink/8ce0af0a0a0bbb31c8e0 to your computer and use it in GitHub Desktop.
/* | |
Earnings management models | |
Author: Joost Impink, March 2016 | |
Models estimated: | |
- Jones model, tac = a0 + a1 1/TAt-1 + a2chSales + a3PPE + a4ROA + error. | |
- variable names DA_Jones ABSDA_Jones | |
- Modified Jones model, as Jones model, but using chSales - chREC to compute fitted values. | |
- variable names DA_mJones ABSDA_mJones | |
- Kothari 2005, controlling for ROA, tac = a0 + a1 1/TAt-1 + a2(chSales - chREC) + a3PPE + a4ROA + error. | |
- variable names DA_Kothari ABSDA_Kothari | |
- Kothari 2005, performance matched, Jones model, difference in discretionary accruals between firm and closest firm in terms of roa | |
- variable names DA_pmKothari ABSDA_pmKothari | |
tac: Total accruals, computed as net profit after tax before extraordinary items | |
less cash flows from operations | |
1/TAt-1: Inverse of beginning of year total assets | |
chSales: Change in net sales revenue | |
chREC: Change in net receivables | |
PPE: Gross property, plant, and equipment | |
ROA: Return on assets. | |
Variables used Compustat Funda | |
AT: Total assets | |
IB: Income Before Extraordinary Items | |
IBC: Income Before Extraordinary Items (Cash Flow) (used if IB is missing) | |
OANCF: Operating Activities - Net Cash Flow | |
PPEGT: Property, Plant and Equipment - Total (Gross) | |
RECT: Receivables - Total | |
SALE: Sales | |
*/ | |
/* Assign directory for output */ | |
%let projectDir = F:/temp/; | |
/* Include %array and %do_over */ | |
filename m1 url 'https://gist.githubusercontent.com/JoostImpink/c22197c93ecd27bbf7ef/raw/2e2a54825c9dbfdfd66cfc94b9abe05e9d1f1a8e/array.sas'; | |
%include m1; | |
filename m2 url 'https://gist.githubusercontent.com/JoostImpink/c22197c93ecd27bbf7ef/raw/2e2a54825c9dbfdfd66cfc94b9abe05e9d1f1a8e/do_over.sas'; | |
%include m2; | |
/* Winsorize macro */ | |
filename m3 url 'https://gist.githubusercontent.com/JoostImpink/497d4852c49d26f164f5/raw/11efba42a13f24f67b5f037e884f4960560a2166/winsorize.sas'; | |
%include m3; | |
/* Get Funda variables */ | |
%let fundaVars = at ib ibc oancf ppegt rect sale ; | |
data da.a_funda (keep = key gvkey fyear datadate sich &fundaVars); | |
set comp.funda; | |
/* Period */ | |
if 2005 <= fyear <= 2014; | |
/* Generic filter */ | |
if indfmt='INDL' and datafmt='STD' and popsrc='D' and consol='C' ; | |
/* Firm-year identifier */ | |
key = gvkey || fyear; | |
/* Keep if sale > 0, at > 0 */ | |
if sale > 0 and at > 0; | |
/* Use Income Before Extraordinary Items (Cash Flow) if ib is missing */ | |
if ib =. then ib=ibc; | |
run; | |
/* Lagged values for: at sale rect ib */ | |
%let lagVars = at sale rect ib; | |
/* Self join to get lagged values at_l, sale_l, rect_l */ | |
proc sql; | |
create table da.b_funda as select a.*, %do_over(values=&lagVars, between=comma, phrase=b.? as ?_l) | |
from da.a_funda a, da.a_funda b | |
where a.gvkey = b.gvkey and a.fyear-1 = b.fyear; | |
quit; | |
/* Construct additional variables */ | |
data da.b_funda; | |
set da.b_funda; | |
/* 2-digit SIC */ | |
SIC2 = int(sich/100); | |
/* variables */ | |
tac = (ib - oancf)/at_l; /* alternative: tac = (ib-oancf+xidoc)/at_l */ | |
inv_at_l = 1 / at_l; | |
rev = sale / at_l; | |
drev = (sale - sale_l) / at_l; | |
drevadj = (sale - sale_l)/at_l - (rect - rect_l)/at_l; | |
ppe = ppegt / at_l; | |
roa = ib / at; | |
roa_l = ib_l/ at_l; /* net income before extraordinary items */ | |
/* these variables may not be missing (cmiss counts missing variables)*/ | |
if cmiss (of tac inv_at_l drevadj ppe roa_l) eq 0; | |
run; | |
/* Winsorize */ | |
%let winsVars = tac inv_at_l drev drevadj ppe roa_l ; | |
%winsor(dsetin=da.b_funda, dsetout=da.b_funda_wins, /*byvar=, */ vars=&winsVars, type=winsor, pctl=1 99); | |
/* Regression by industry-year | |
edf + #params (4) will equal the number of obs (no need for proc univariate to count) */ | |
proc sort data=da.b_funda_wins; by fyear sic2;run; | |
proc reg data=da.b_funda_wins noprint edf outest=da.c_parms; | |
model tac = inv_at_l drev ppe; /* Jones Model */ | |
model tac = inv_at_l drevadj ppe roa_l; /* Kothari with ROA in model */ | |
by fyear sic2; | |
run; | |
/* Append discretionary accrual measures */ | |
/* Jones model */ | |
proc sql; | |
create table da.d_model1 as select a.*, b.DA_Jones, abs (b.DA_Jones) as ABSDA_Jones | |
from | |
da.b_funda_wins a left join | |
( select a.key, a.tac - ( b.intercept + %do_over(values=inv_at_l drev ppe, between=%str(+), phrase=a.? * b.?) ) as DA_Jones | |
from da.b_funda_wins a left join da.c_parms b | |
on a.sic2 = b.sic2 and a.fyear = b.fyear | |
/* Model 1 */ | |
and b._MODEL_ eq "MODEL1" | |
/* at a minimum 10 obs (6 degrees of freedom) */ | |
and b._EDF_ > 6 | |
) b | |
on a.key = b.key ; | |
quit; | |
/* Modified Jones model: drev is used in first model, but drevadj is used to compute fitted value */ | |
proc sql; | |
create table da.d_model2 as select a.*, b.DA_mJones, abs (b.DA_mJones) as ABSDA_mJones | |
from | |
da.d_model1 a left join | |
( select a.key, a.tac - ( b.intercept + a.drevadj * b.drev + %do_over(values=inv_at_l ppe, between=%str(+), phrase=a.? * b.?) ) as DA_mJones | |
from da.d_model1 a left join da.c_parms b | |
on a.sic2 = b.sic2 and a.fyear = b.fyear | |
/* Model 1 */ | |
and b._MODEL_ eq "MODEL1" | |
/* at a minimum 10 obs (6 degrees of freedom) */ | |
and b._EDF_ > 6 | |
) b | |
on a.key = b.key ; | |
quit; | |
/* Kothari model (with ROA in regression) */ | |
proc sql; | |
create table da.d_model3 as select a.*, b.DA_Kothari, abs (b.DA_Kothari) as ABSDA_Kothari | |
from | |
da.d_model2 a left join | |
( select a.key, a.tac - ( b.intercept + %do_over(values=inv_at_l drevadj ppe roa_l, between=%str(+), phrase=a.? * b.?) ) as DA_Kothari | |
from da.d_model2 a left join da.c_parms b | |
on a.sic2 = b.sic2 and a.fyear = b.fyear | |
/* Model 2 */ | |
and b._MODEL_ eq "MODEL2" | |
/* at a minimum 10 obs (5 degrees of freedom) */ | |
and b._EDF_ > 5 | |
) b | |
on a.key = b.key ; | |
quit; | |
/* Kothari performance matching: get DA_Jones accruals for closest in ROA */ | |
proc sql; | |
create table da.d_model4 as | |
select a.*, | |
/* gvkey of matched firm */ | |
b.gvkey as gvkey_m, | |
/* difference in ROA */ | |
abs(a.roa - b.roa) as Difference, | |
/* difference in DA_Jones (and absolute difference) */ | |
a.DA_Jones - b.DA_Jones as DA_pmKothari, abs (calculated DA_pmKothari) as ABSDA_pmKothari | |
from da.d_model3 a left join da.d_model3 b | |
on a.fyear = b.fyear and a.sic2 = b.sic2 /* same 2-digit SIC industry-year */ | |
and a.key ne b.key /* not the same firm */ | |
group by a.gvkey, a.fyear | |
having Difference = min(Difference); /* keep best match for size difference */ | |
quit; | |
/* drop possible multiple matches (with the same difference) in previous step */ | |
proc sort data=da.d_model4 nodupkey; by key;run; | |
/* Winsorize discretionary accrual variables */ | |
%let winsVars = DA_Jones DA_mJones DA_Kothari DA_pmKothari ABSDA_Jones ABSDA_mJones ABSDA_Kothari ABSDA_pmKothari ; | |
%winsor(dsetin=da.d_model4, dsetout=da.e_wins, /*byvar=, */ vars=&winsVars, type=winsor, pctl=1 99); | |
/* Means, medians for key variables */ | |
proc means data=da.e_wins n mean median ; | |
var tac inv_at_l drevadj ppe roa_l &winsVars ; | |
run; | |
/* Output dataset */ | |
proc export data = da.e_wins (keep = gvkey fyear datadate sich tac inv_at_l drevadj ppe roa_l &winsVars) | |
outfile = "&projectDir.stata/from sas/absda_march_15.dta" replace; run; |
I am deeply appreciated your sharing!
Small question.
Can we write
proc sql;
create table da.d_modelX as
select distinct a.*,....
quit;
be a better way to avoid the multiple matching?
I am not sure why my da.d_model1, 2, 3 have so many duplications in the end.
Your code saved me so much time. Thank you!!
Your sharing is very kind. However, I have the same problem with hwarraich (commented on Nov 25 2017). I am not sure where to learn the STATA code for performance adjusted model. Any suggestion would be a great help. Thank you very much.
I have the following to prevent the duplicate records:
proc sort data=da.d_model4 nodupkey; by key;run;
SAS is better equipped for these things than Stata, so learning both languages is my suggestion :)
This is absolutely a really helpful program. Huge thanks.
Just want to mention that in the industry-year regression, some papers use leave-one-out sample. So it might be better to use a Jacknife method to correct for this small bias afterwards.
Still, it's really appreciated!
I've been trying to figure out how to run the performance adjusted model in Stata. So far, I haven't been successful. Any ideas? Any help will be greatly appreciated.