JoostImpink · March 30, 2024 04:49 · hwarraich · Nov 24, 2017 · hcchuang · Dec 1, 2017
diff --git a/earnings_management_models.sas b/earnings_management_models.sas
 /*

 	Earnings management models

 	Author: Joost Impink, March 2016

 	Models estimated:
 	- Jones model, 	tac = a0 + a1 1/TAt-1 + a2chSales + a3PPE + a4ROA + error.
 		- variable names DA_Jones ABSDA_Jones 
 	- Modified Jones model, as Jones model, but using chSales - chREC to compute fitted values.
 		- variable names DA_mJones ABSDA_mJones 
 	- Kothari 2005, controlling for ROA, tac = a0 + a1 1/TAt-1 + a2(chSales - chREC) + a3PPE + a4ROA + error.
 		- variable names DA_Kothari ABSDA_Kothari  
 	- Kothari 2005, performance matched, Jones model, difference in discretionary accruals between firm and closest firm in terms of roa
 		- variable names DA_pmKothari ABSDA_pmKothari  

 	tac:		Total accruals, computed as net profit after tax before extraordinary items 
 				less cash flows from operations	
 	1/TAt-1:	Inverse of beginning of year total assets
 	chSales:	Change in net sales revenue
 	chREC: 		Change in net receivables
 	PPE:		Gross property, plant, and equipment
 	ROA:		Return on assets. 

 	Variables used Compustat Funda
 	AT:		Total assets
 	IB: 	Income Before Extraordinary Items
 	IBC: 	Income Before Extraordinary Items (Cash Flow) (used if IB is missing)
 	OANCF: 	Operating Activities - Net Cash Flow
 	PPEGT:	Property, Plant and Equipment - Total (Gross)
 	RECT: 	Receivables - Total
 	SALE:	Sales
 */

 /* Assign directory for output */
 %let projectDir = F:/temp/;

 /* Include %array and %do_over */
 filename m1 url 'https://gist.githubusercontent.com/JoostImpink/c22197c93ecd27bbf7ef/raw/2e2a54825c9dbfdfd66cfc94b9abe05e9d1f1a8e/array.sas';
 %include m1;
 filename m2 url 'https://gist.githubusercontent.com/JoostImpink/c22197c93ecd27bbf7ef/raw/2e2a54825c9dbfdfd66cfc94b9abe05e9d1f1a8e/do_over.sas';
 %include m2;

 /* Winsorize macro */
 filename m3 url 'https://gist.githubusercontent.com/JoostImpink/497d4852c49d26f164f5/raw/11efba42a13f24f67b5f037e884f4960560a2166/winsorize.sas';
 %include m3;

 /* Get Funda variables */
 %let fundaVars = at ib ibc oancf ppegt rect sale ;

 data da.a_funda (keep = key gvkey fyear datadate sich &fundaVars);
 set comp.funda;
 /* Period */
 if 2005 <= fyear <= 2014;
 /* Generic filter */
 if indfmt='INDL' and datafmt='STD' and popsrc='D' and consol='C' ;
 /* Firm-year identifier */
 key = gvkey || fyear;
 /* Keep if sale > 0, at > 0 */
 if sale > 0 and at > 0;
 /* Use Income Before Extraordinary Items (Cash Flow) if ib is missing */
 if ib =. then ib=ibc;
 run;

 /* Lagged values for: at sale rect ib */
 %let lagVars = at sale rect ib;

 /* Self join to get lagged values at_l, sale_l, rect_l */
 proc sql;
 create table da.b_funda as select a.*, %do_over(values=&lagVars, between=comma, phrase=b.? as ?_l)
 from da.a_funda a, da.a_funda b
 where a.gvkey = b.gvkey and a.fyear-1 = b.fyear;
 quit;

 /* Construct additional variables */
 data da.b_funda;
 set da.b_funda;
 /* 2-digit SIC  */
 SIC2 = int(sich/100);
 /* variables */
 tac       = (ib - oancf)/at_l;  /* alternative: tac        = (ib-oancf+xidoc)/at_l */
 inv_at_l  = 1 / at_l;
 rev       = sale / at_l;
 drev      = (sale - sale_l) / at_l;
 drevadj   = (sale - sale_l)/at_l - (rect - rect_l)/at_l;
 ppe       = ppegt / at_l;
 roa 	  = ib / at;
 roa_l 	  = ib_l/ at_l; /* net income before extraordinary items */
 /* these variables may not be missing (cmiss counts missing variables)*/
 if cmiss  (of tac inv_at_l drevadj ppe roa_l) eq 0;
 run;

 /* Winsorize  */
 %let winsVars = tac inv_at_l drev drevadj ppe roa_l  ; 
 %winsor(dsetin=da.b_funda, dsetout=da.b_funda_wins, /*byvar=, */ vars=&winsVars, type=winsor, pctl=1 99);

 /* Regression by industry-year 
 edf + #params (4) will equal the number of obs (no need for proc univariate to count) */
 proc sort data=da.b_funda_wins; by fyear sic2;run;
 proc reg data=da.b_funda_wins noprint edf outest=da.c_parms;
 model tac = inv_at_l drev ppe; /* Jones Model */
 model tac = inv_at_l drevadj ppe roa_l; /* Kothari with ROA in model */     
 by fyear sic2;
 run;

 /* Append discretionary accrual measures */

 /* Jones model */
 proc sql;
 create table da.d_model1 as select a.*, b.DA_Jones, abs (b.DA_Jones) as ABSDA_Jones	
 	from 
 		da.b_funda_wins a left join 
 			( select a.key, a.tac - ( b.intercept + %do_over(values=inv_at_l drev ppe, between=%str(+), phrase=a.? * b.?) ) as DA_Jones
 			 from da.b_funda_wins a left join da.c_parms b
 			 on a.sic2 = b.sic2 and a.fyear = b.fyear
 			 /* Model 1 */
 			 and b._MODEL_ eq "MODEL1"
 			 /* at a minimum 10 obs (6 degrees of freedom) */
 			 and b._EDF_ > 6    
 	     ) b
 		on a.key = b.key ;
 quit;

 /* Modified Jones model: drev is used in first model, but drevadj is used to compute fitted value */
 proc sql;
 create table da.d_model2 as select a.*, b.DA_mJones, abs (b.DA_mJones) as ABSDA_mJones	
 	from 
 		da.d_model1 a left join 
 			( select a.key, a.tac - ( b.intercept + a.drevadj * b.drev + %do_over(values=inv_at_l ppe, between=%str(+), phrase=a.? * b.?) ) as DA_mJones
 			 from da.d_model1 a left join da.c_parms b
 			 on a.sic2 = b.sic2 and a.fyear = b.fyear
 			 /* Model 1 */
 			 and b._MODEL_ eq "MODEL1"
 			 /* at a minimum 10 obs (6 degrees of freedom) */
 			 and b._EDF_ > 6    
 	     ) b
 		on a.key = b.key ;
 quit;

 /* Kothari model (with ROA in regression) */
 proc sql;
 create table da.d_model3 as select a.*, b.DA_Kothari, abs (b.DA_Kothari) as ABSDA_Kothari	
 	from 
 		da.d_model2 a left join 
 			( select a.key, a.tac - ( b.intercept + %do_over(values=inv_at_l drevadj ppe roa_l, between=%str(+), phrase=a.? * b.?) ) as DA_Kothari
 			 from da.d_model2 a left join da.c_parms b
 			 on a.sic2 = b.sic2 and a.fyear = b.fyear
 			 /* Model 2 */
 			 and b._MODEL_ eq "MODEL2"
 			 /* at a minimum 10 obs (5 degrees of freedom) */
 			 and b._EDF_ > 5    
 	     ) b
 		on a.key = b.key ;
 quit;

 /*	Kothari performance matching: get DA_Jones accruals for closest in ROA */
 proc sql;
 	create table da.d_model4 as
 		select a.*,
 		/* gvkey of matched firm */
 		b.gvkey as gvkey_m, 
 		/* difference in ROA */
 		abs(a.roa - b.roa) as Difference, 
 		/* difference in DA_Jones (and absolute difference) */
 		a.DA_Jones - b.DA_Jones as DA_pmKothari, abs (calculated DA_pmKothari) as ABSDA_pmKothari
 		from da.d_model3 a left join  da.d_model3  b
 		on a.fyear = b.fyear and a.sic2 = b.sic2 /* same 2-digit SIC industry-year */		
 		and a.key ne b.key /* not the same firm */
 		group by a.gvkey, a.fyear
 		having Difference = min(Difference); /* keep best match for size difference */		
 quit;

 /* drop possible multiple matches (with the same difference) in previous step */
 proc sort data=da.d_model4 nodupkey; by key;run;

 /* Winsorize discretionary accrual variables  */
 %let winsVars = DA_Jones DA_mJones DA_Kothari DA_pmKothari ABSDA_Jones  ABSDA_mJones  ABSDA_Kothari ABSDA_pmKothari  ; 
 %winsor(dsetin=da.d_model4, dsetout=da.e_wins, /*byvar=, */ vars=&winsVars, type=winsor, pctl=1 99);

 /* Means, medians for key variables */
 proc means data=da.e_wins n mean median ;
 var tac inv_at_l drevadj ppe roa_l &winsVars ;
 run; 

 /*	Output dataset  */
 proc export data = da.e_wins  (keep = gvkey fyear datadate sich tac inv_at_l drevadj ppe roa_l &winsVars) 
 outfile = "&projectDir.stata/from sas/absda_march_15.dta" replace; run;
	/*

	Earnings management models

	Author: Joost Impink, March 2016

	Models estimated:
	- Jones model, tac = a0 + a1 1/TAt-1 + a2chSales + a3PPE + a4ROA + error.
	- variable names DA_Jones ABSDA_Jones
	- Modified Jones model, as Jones model, but using chSales - chREC to compute fitted values.
	- variable names DA_mJones ABSDA_mJones
	- Kothari 2005, controlling for ROA, tac = a0 + a1 1/TAt-1 + a2(chSales - chREC) + a3PPE + a4ROA + error.
	- variable names DA_Kothari ABSDA_Kothari
	- Kothari 2005, performance matched, Jones model, difference in discretionary accruals between firm and closest firm in terms of roa
	- variable names DA_pmKothari ABSDA_pmKothari

	tac: Total accruals, computed as net profit after tax before extraordinary items
	less cash flows from operations
	1/TAt-1: Inverse of beginning of year total assets
	chSales: Change in net sales revenue
	chREC: Change in net receivables
	PPE: Gross property, plant, and equipment
	ROA: Return on assets.

	Variables used Compustat Funda
	AT: Total assets
	IB: Income Before Extraordinary Items
	IBC: Income Before Extraordinary Items (Cash Flow) (used if IB is missing)
	OANCF: Operating Activities - Net Cash Flow
	PPEGT: Property, Plant and Equipment - Total (Gross)
	RECT: Receivables - Total
	SALE: Sales
	*/

	/* Assign directory for output */
	%let projectDir = F:/temp/;

	/* Include %array and %do_over */
	filename m1 url 'https://gist.githubusercontent.com/JoostImpink/c22197c93ecd27bbf7ef/raw/2e2a54825c9dbfdfd66cfc94b9abe05e9d1f1a8e/array.sas';
	%include m1;
	filename m2 url 'https://gist.githubusercontent.com/JoostImpink/c22197c93ecd27bbf7ef/raw/2e2a54825c9dbfdfd66cfc94b9abe05e9d1f1a8e/do_over.sas';
	%include m2;

	/* Winsorize macro */
	filename m3 url 'https://gist.githubusercontent.com/JoostImpink/497d4852c49d26f164f5/raw/11efba42a13f24f67b5f037e884f4960560a2166/winsorize.sas';
	%include m3;

	/* Get Funda variables */
	%let fundaVars = at ib ibc oancf ppegt rect sale ;

	data da.a_funda (keep = key gvkey fyear datadate sich &fundaVars);
	set comp.funda;
	/* Period */
	if 2005 <= fyear <= 2014;
	/* Generic filter */
	if indfmt='INDL' and datafmt='STD' and popsrc='D' and consol='C' ;
	/* Firm-year identifier */
	key = gvkey \|\| fyear;
	/* Keep if sale > 0, at > 0 */
	if sale > 0 and at > 0;
	/* Use Income Before Extraordinary Items (Cash Flow) if ib is missing */
	if ib =. then ib=ibc;
	run;

	/* Lagged values for: at sale rect ib */
	%let lagVars = at sale rect ib;

	/* Self join to get lagged values at_l, sale_l, rect_l */
	proc sql;
	create table da.b_funda as select a.*, %do_over(values=&lagVars, between=comma, phrase=b.? as ?_l)
	from da.a_funda a, da.a_funda b
	where a.gvkey = b.gvkey and a.fyear-1 = b.fyear;
	quit;

	/* Construct additional variables */
	data da.b_funda;
	set da.b_funda;
	/* 2-digit SIC */
	SIC2 = int(sich/100);
	/* variables */
	tac = (ib - oancf)/at_l; /* alternative: tac = (ib-oancf+xidoc)/at_l */
	inv_at_l = 1 / at_l;
	rev = sale / at_l;
	drev = (sale - sale_l) / at_l;
	drevadj = (sale - sale_l)/at_l - (rect - rect_l)/at_l;
	ppe = ppegt / at_l;
	roa = ib / at;
	roa_l = ib_l/ at_l; /* net income before extraordinary items */
	/* these variables may not be missing (cmiss counts missing variables)*/
	if cmiss (of tac inv_at_l drevadj ppe roa_l) eq 0;
	run;

	/* Winsorize */
	%let winsVars = tac inv_at_l drev drevadj ppe roa_l ;
	%winsor(dsetin=da.b_funda, dsetout=da.b_funda_wins, /byvar=, / vars=&winsVars, type=winsor, pctl=1 99);

	/* Regression by industry-year
	edf + #params (4) will equal the number of obs (no need for proc univariate to count) */
	proc sort data=da.b_funda_wins; by fyear sic2;run;
	proc reg data=da.b_funda_wins noprint edf outest=da.c_parms;
	model tac = inv_at_l drev ppe; /* Jones Model */
	model tac = inv_at_l drevadj ppe roa_l; /* Kothari with ROA in model */
	by fyear sic2;
	run;

	/* Append discretionary accrual measures */

	/* Jones model */
	proc sql;
	create table da.d_model1 as select a.*, b.DA_Jones, abs (b.DA_Jones) as ABSDA_Jones
	from
	da.b_funda_wins a left join
	( select a.key, a.tac - ( b.intercept + %do_over(values=inv_at_l drev ppe, between=%str(+), phrase=a.? * b.?) ) as DA_Jones
	from da.b_funda_wins a left join da.c_parms b
	on a.sic2 = b.sic2 and a.fyear = b.fyear
	/* Model 1 */
	and b._MODEL_ eq "MODEL1"
	/* at a minimum 10 obs (6 degrees of freedom) */
	and b._EDF_ > 6
	) b
	on a.key = b.key ;
	quit;

	/* Modified Jones model: drev is used in first model, but drevadj is used to compute fitted value */
	proc sql;
	create table da.d_model2 as select a.*, b.DA_mJones, abs (b.DA_mJones) as ABSDA_mJones
	from
	da.d_model1 a left join
	( select a.key, a.tac - ( b.intercept + a.drevadj * b.drev + %do_over(values=inv_at_l ppe, between=%str(+), phrase=a.? * b.?) ) as DA_mJones
	from da.d_model1 a left join da.c_parms b
	on a.sic2 = b.sic2 and a.fyear = b.fyear
	/* Model 1 */
	and b._MODEL_ eq "MODEL1"
	/* at a minimum 10 obs (6 degrees of freedom) */
	and b._EDF_ > 6
	) b
	on a.key = b.key ;
	quit;

	/* Kothari model (with ROA in regression) */
	proc sql;
	create table da.d_model3 as select a.*, b.DA_Kothari, abs (b.DA_Kothari) as ABSDA_Kothari
	from
	da.d_model2 a left join
	( select a.key, a.tac - ( b.intercept + %do_over(values=inv_at_l drevadj ppe roa_l, between=%str(+), phrase=a.? * b.?) ) as DA_Kothari
	from da.d_model2 a left join da.c_parms b
	on a.sic2 = b.sic2 and a.fyear = b.fyear
	/* Model 2 */
	and b._MODEL_ eq "MODEL2"
	/* at a minimum 10 obs (5 degrees of freedom) */
	and b._EDF_ > 5
	) b
	on a.key = b.key ;
	quit;

	/* Kothari performance matching: get DA_Jones accruals for closest in ROA */
	proc sql;
	create table da.d_model4 as
	select a.*,
	/* gvkey of matched firm */
	b.gvkey as gvkey_m,
	/* difference in ROA */
	abs(a.roa - b.roa) as Difference,
	/* difference in DA_Jones (and absolute difference) */
	a.DA_Jones - b.DA_Jones as DA_pmKothari, abs (calculated DA_pmKothari) as ABSDA_pmKothari
	from da.d_model3 a left join da.d_model3 b
	on a.fyear = b.fyear and a.sic2 = b.sic2 /* same 2-digit SIC industry-year */
	and a.key ne b.key /* not the same firm */
	group by a.gvkey, a.fyear
	having Difference = min(Difference); /* keep best match for size difference */
	quit;

	/* drop possible multiple matches (with the same difference) in previous step */
	proc sort data=da.d_model4 nodupkey; by key;run;

	/* Winsorize discretionary accrual variables */
	%let winsVars = DA_Jones DA_mJones DA_Kothari DA_pmKothari ABSDA_Jones ABSDA_mJones ABSDA_Kothari ABSDA_pmKothari ;
	%winsor(dsetin=da.d_model4, dsetout=da.e_wins, /byvar=, / vars=&winsVars, type=winsor, pctl=1 99);

	/* Means, medians for key variables */
	proc means data=da.e_wins n mean median ;
	var tac inv_at_l drevadj ppe roa_l &winsVars ;
	run;

	/* Output dataset */
	proc export data = da.e_wins (keep = gvkey fyear datadate sich tac inv_at_l drevadj ppe roa_l &winsVars)
	outfile = "&projectDir.stata/from sas/absda_march_15.dta" replace; run;