Created
January 19, 2014 01:40
-
-
Save dougvk/8499335 to your computer and use it in GitHub Desktop.
(stock ticker -> CIK) dictionary using SEC EDGAR
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from cPickle import dump | |
from requests import get | |
DEFAULT_TICKERS = ['goog', 'aapl'] | |
URL = 'http://www.sec.gov/cgi-bin/browse-edgar?CIK={}&Find=Search&owner=exclude&action=getcompany' | |
CIK_RE = re.compile(r'.*CIK=(\d{10}).*') | |
cik_dict = {} | |
for ticker in DEFAULT_TICKERS: | |
results = CIK_RE.findall(get(URL.format(ticker)).content) | |
if len(results): | |
cik_dict[str(ticker).lower()] = str(results[0]) | |
f = open('cik_dict', 'w') | |
dump(cik_dict, f) | |
f.close() |
Today you have to set a user agent:
import re, requests
headers = {"user-agent": "Safari"}
def getCIKs(TICKERS):
URL = 'http://www.sec.gov/cgi-bin/browse-edgar?CIK={}&Find=Search&owner=exclude&action=getcompany'
CIK_RE = re.compile(r'.*CIK=(\d{10}).*')
cik_dict = {}
for ticker in TICKERS:
f = requests.get(URL.format(ticker),headers= headers, stream = True)
results = CIK_RE.findall(f.text)
if len(results):
results[0] = int(re.sub('\.[0]*', '.', results[0]))
cik_dict[str(ticker).upper()] = str(results[0])
f = open('cik_dict', 'w')
f.close()
return(cik_dict)
getCIKs(['wmt','amzn','nflx'])
# returns:
# {'WMT': '104169', 'AMZN': '1018724', 'NFLX': '1065280'}
It's failed today because the SEC has already changed the website
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@Micah81 instead of
print(cik_dict)
,return(cik_dict)
worked better for me