Created
January 19, 2014 01:40
-
-
Save dougvk/8499335 to your computer and use it in GitHub Desktop.
(stock ticker -> CIK) dictionary using SEC EDGAR
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from cPickle import dump | |
from requests import get | |
DEFAULT_TICKERS = ['goog', 'aapl'] | |
URL = 'http://www.sec.gov/cgi-bin/browse-edgar?CIK={}&Find=Search&owner=exclude&action=getcompany' | |
CIK_RE = re.compile(r'.*CIK=(\d{10}).*') | |
cik_dict = {} | |
for ticker in DEFAULT_TICKERS: | |
results = CIK_RE.findall(get(URL.format(ticker)).content) | |
if len(results): | |
cik_dict[str(ticker).lower()] = str(results[0]) | |
f = open('cik_dict', 'w') | |
dump(cik_dict, f) | |
f.close() |
Thank you everyone, your comments and code was a huge help.
for 3.6:
import re, requests
def getCIKs(TICKERS):
URL = 'http://www.sec.gov/cgi-bin/browse-edgar?CIK={}&Find=Search&owner=exclude&action=getcompany'
CIK_RE = re.compile(r'.*CIK=(\d{10}).*')
cik_dict = {}
for ticker in TICKERS:
f = requests.get(URL.format(ticker), stream = True)
results = CIK_RE.findall(f.text)
if len(results):
results[0] = int(re.sub('\.[0]*', '.', results[0]))
cik_dict[str(ticker).upper()] = str(results[0])
f = open('cik_dict', 'w')
print(cik_dict)
f.close()
getCIKs(['wmt','amzn','nflx'])
# returns:
# {'WMT': '104169', 'AMZN': '1018724', 'NFLX': '1065280'}
@Micah81 instead of print(cik_dict)
, return(cik_dict)
worked better for me
Today you have to set a user agent:
import re, requests
headers = {"user-agent": "Safari"}
def getCIKs(TICKERS):
URL = 'http://www.sec.gov/cgi-bin/browse-edgar?CIK={}&Find=Search&owner=exclude&action=getcompany'
CIK_RE = re.compile(r'.*CIK=(\d{10}).*')
cik_dict = {}
for ticker in TICKERS:
f = requests.get(URL.format(ticker),headers= headers, stream = True)
results = CIK_RE.findall(f.text)
if len(results):
results[0] = int(re.sub('\.[0]*', '.', results[0]))
cik_dict[str(ticker).upper()] = str(results[0])
f = open('cik_dict', 'w')
f.close()
return(cik_dict)
getCIKs(['wmt','amzn','nflx'])
# returns:
# {'WMT': '104169', 'AMZN': '1018724', 'NFLX': '1065280'}
It's failed today because the SEC has already changed the website
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I want to convert this into a dataframe and it keeps spitting out the following
How do I rename my columns?
I tried
df = df.rename(columns = {"0": "CIK"})
but that didn't work.Here is the version of the code that I used:
`import re
from _pickle import dump
import requests
import pandas as pd
DEFAULT_TICKERS = ['goog', 'aapl']
URL = 'http://www.sec.gov/cgi-bin/browse-edgar?CIK={}&Find=Search&owner=exclude&action=getcompany'
CIK_RE = re.compile(r'.CIK=(\d{10}).')
cik_dict = {}
for ticker in DEFAULT_TICKERS:
f = requests.get(URL.format(ticker), stream=True);
results = CIK_RE.findall(f.text)
if len(results):
cik_dict[str(ticker).lower()] = str(results[0])
print(cik_dict)
df = pd.DataFrame.from_dict(cik_dict, orient = 'index')
df = df.rename(columns = {"0": "CIK"})
print(df)`