Created
September 10, 2018 20:37
-
-
Save edwinhu/abdb84baa3e246051f9210a156926ea9 to your computer and use it in GitHub Desktop.
Get all COMPUSTAT names
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* The goal of this code is to identify all gvkeys in Compustat, | |
and to estimate approximate start/end dates for these gvkeys */ | |
proc sql; | |
/* All NA Companies */ | |
create table comp_name_range as | |
select * | |
from | |
/* Company files contains all NA gvkeys */ | |
(select gvkey, ein, cik, | |
UPCASE(COMPBL(PRXCHANGE('s/[\W_]+/ /',-1,conm))) as conm, | |
ipodate format yymmdd10., | |
dldte format yymmdd10., | |
costat, | |
fic, state | |
from comp.company) a | |
left join | |
/* Note that there is overlap | |
between names and bank_names | |
but the dates need not be the same | |
as the data can come from different sources | |
(i.e., the INDFMT) */ | |
(select gvkey, cik, | |
min(year1) as fund_date_min format yymmdd10., | |
min(year2) as fund_date_max format yymmdd10. | |
from | |
(select gvkey, cik, year1, year2 | |
from comp.names | |
union all | |
select gvkey, cik, year1, year2 | |
from comp.bank_names) | |
group by gvkey) b | |
on a.gvkey=b.gvkey | |
left join | |
/* Some companies don't have fundamentals, | |
but do have registered securities and hence | |
they still have Compustat gvkeys | |
(e.g., private firms) | |
NA Security History file dates do not appear | |
to be accurate, hence I am using the dates | |
from the namesm file */ | |
(select distinct gvkey, cik, | |
year1 as sec_date_min format yymmdd10., | |
year2 as sec_date_max format yymmdd10. | |
from comp.namesm) c | |
on a.gvkey = c.gvkey | |
; | |
/* All Compustat Global Companies */ | |
create table g_comp_name_range as | |
select * | |
from | |
/* Global Company file is analogous | |
to NA Company file */ | |
(select gvkey, ein, cik, | |
UPCASE(COMPBL(PRXCHANGE('s/[\W_]+/ /',-1,conm))) as conm, | |
ipodate format yymmdd10., | |
dldte format yymmdd10., | |
costat, | |
fic, state | |
from comp.g_company) a | |
left join | |
/* Note: there is no Global Bank file */ | |
(select gvkey, | |
year1 as fund_date_min format yymmdd10., | |
year2 as fund_date_max format yymmdd10. | |
from comp.g_names) b | |
on a.gvkey=b.gvkey | |
left join | |
/* Note: there is no (sec)namesm file | |
for Compustat Global, but the Security History file | |
also has dates */ | |
(select distinct gvkey, | |
min(effdate) as sec_date_min format yymmdd10., | |
max(thrudate) as sec_date_max format yymmdd10. | |
from comp.g_sec_history | |
group by gvkey) c | |
on a.gvkey = c.gvkey | |
; | |
/* Combine the two, accounting for overlaps */ | |
create table comp_name_range_all as | |
select *, | |
/* Get the earliest date for which there is Compustat data | |
this may also be the IPO Date or Deletion Date */ | |
smallest(1, ipodate, fund_date_min, sec_date_min) as comp_date_min format yymmdd10., | |
largest(1, dldte, fund_date_max, sec_date_max) as comp_date_max format yymmdd10. | |
from | |
/* Some firms show up in both NA and Global */ | |
(select distinct gvkey, ein, cik, | |
min(ipodate) as ipodate format yymmdd10., | |
max(dldte) as dldte format yymmdd10., | |
min(fund_date_min) as fund_date_min format yymmdd10., | |
max(fund_date_max) as fund_date_max format yymmdd10., | |
min(sec_date_min) as sec_date_min format yymmdd10., | |
max(sec_date_max) as sec_date_max format yymmdd10., | |
conm, costat, | |
fic as country, state | |
from | |
(select * | |
from comp_name_range | |
union all | |
select * | |
from g_comp_name_range) | |
group by gvkey) | |
order by gvkey | |
; | |
quit; | |
/* There shouldn't be duplicates unless | |
a firm shows up in both NA and Global, | |
and the company metadata (e.g., name | |
or status) is different */ | |
proc sql; | |
create table _dup as | |
select * from comp_name_range_all | |
group by gvkey | |
having count(*) >= 2 | |
; | |
quit; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment