Last active
May 29, 2017 22:43
-
-
Save shotahorii/e662577acc61b859d8680825f55c8be3 to your computer and use it in GitHub Desktop.
Fundamental data from SEC
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Fundamental data from SEC\n", | |
"- Data source (SEC) : https://www.sec.gov/dera/data/financial-statement-data-sets.html\n", | |
"- Documentation: https://www.sec.gov/data/financial-statements/aqfs.pdf" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Load data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# sub data\n", | |
"with open('./data/sec/2017q1/sub.txt', 'rb') as f:\n", | |
" sub = pd.read_csv(f, sep='\\t', header=0, encoding='ISO-8859-1')\n", | |
" \n", | |
"# num data\n", | |
"with open('./data/sec/2017q1/num.txt', 'rb') as f:\n", | |
" num = pd.read_csv(f, sep='\\t', header=0, encoding='ISO-8859-1')\n", | |
" \n", | |
"# tag data\n", | |
"with open('./data/sec/2017q1/tag.txt', 'rb') as f:\n", | |
" tag = pd.read_csv(f, sep='\\t', header=0, encoding='ISO-8859-1')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"collapsed": true | |
}, | |
"source": [ | |
"#### Quick data check" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(6270, 36)\n", | |
"(1927179, 9)\n", | |
"(81519, 9)\n" | |
] | |
} | |
], | |
"source": [ | |
"# check the data size\n", | |
"print(sub.shape)\n", | |
"print(num.shape)\n", | |
"print(tag.shape)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"-------- sub --------\n", | |
"adsh 0\n", | |
"cik 0\n", | |
"name 0\n", | |
"sic 10\n", | |
"countryba 11\n", | |
"stprba 476\n", | |
"cityba 11\n", | |
"zipba 14\n", | |
"bas1 12\n", | |
"bas2 3586\n", | |
"baph 15\n", | |
"countryma 57\n", | |
"stprma 500\n", | |
"cityma 57\n", | |
"zipma 58\n", | |
"mas1 58\n", | |
"mas2 3636\n", | |
"countryinc 573\n", | |
"stprinc 795\n", | |
"ein 0\n", | |
"former 3019\n", | |
"changed 3019\n", | |
"afs 26\n", | |
"wksi 0\n", | |
"fye 1\n", | |
"form 0\n", | |
"period 0\n", | |
"fy 48\n", | |
"fp 54\n", | |
"filed 0\n", | |
"accepted 0\n", | |
"prevrpt 0\n", | |
"detail 0\n", | |
"instance 0\n", | |
"nciks 0\n", | |
"aciks 6149\n", | |
"dtype: int64\n", | |
"-------- num --------\n", | |
"adsh 0\n", | |
"tag 0\n", | |
"version 0\n", | |
"coreg 1666658\n", | |
"ddate 0\n", | |
"qtrs 0\n", | |
"uom 0\n", | |
"value 39906\n", | |
"footnote 1927179\n", | |
"dtype: int64\n", | |
"-------- tag --------\n", | |
"tag 1\n", | |
"version 0\n", | |
"custom 0\n", | |
"abstract 0\n", | |
"datatype 4321\n", | |
"iord 13150\n", | |
"crdr 21631\n", | |
"tlabel 22\n", | |
"doc 6895\n", | |
"dtype: int64\n" | |
] | |
} | |
], | |
"source": [ | |
"# check null\n", | |
"print(\"-------- sub --------\")\n", | |
"print(sub.isnull().sum())\n", | |
"print(\"-------- num --------\")\n", | |
"print(num.isnull().sum())\n", | |
"print(\"-------- tag --------\")\n", | |
"print(tag.isnull().sum())" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Data quick look" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>adsh</th>\n", | |
" <th>cik</th>\n", | |
" <th>name</th>\n", | |
" <th>sic</th>\n", | |
" <th>countryba</th>\n", | |
" <th>stprba</th>\n", | |
" <th>cityba</th>\n", | |
" <th>zipba</th>\n", | |
" <th>bas1</th>\n", | |
" <th>bas2</th>\n", | |
" <th>...</th>\n", | |
" <th>period</th>\n", | |
" <th>fy</th>\n", | |
" <th>fp</th>\n", | |
" <th>filed</th>\n", | |
" <th>accepted</th>\n", | |
" <th>prevrpt</th>\n", | |
" <th>detail</th>\n", | |
" <th>instance</th>\n", | |
" <th>nciks</th>\n", | |
" <th>aciks</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0000002178-17-000019</td>\n", | |
" <td>2178</td>\n", | |
" <td>ADAMS RESOURCES & ENERGY, INC.</td>\n", | |
" <td>5172</td>\n", | |
" <td>US</td>\n", | |
" <td>TX</td>\n", | |
" <td>HOUSTON</td>\n", | |
" <td>77027</td>\n", | |
" <td>17 S. BRIAR HOLLOW LN.</td>\n", | |
" <td>NaN</td>\n", | |
" <td>...</td>\n", | |
" <td>20161231</td>\n", | |
" <td>2016</td>\n", | |
" <td>FY</td>\n", | |
" <td>20170331</td>\n", | |
" <td>2017-03-31 12:28:00.0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>ae-20161231.xml</td>\n", | |
" <td>1</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0000002488-17-000043</td>\n", | |
" <td>2488</td>\n", | |
" <td>ADVANCED MICRO DEVICES INC</td>\n", | |
" <td>3674</td>\n", | |
" <td>US</td>\n", | |
" <td>CA</td>\n", | |
" <td>SUNNYVALE</td>\n", | |
" <td>94085</td>\n", | |
" <td>ONE AMD PL</td>\n", | |
" <td>MS 68</td>\n", | |
" <td>...</td>\n", | |
" <td>20161231</td>\n", | |
" <td>2016</td>\n", | |
" <td>FY</td>\n", | |
" <td>20170221</td>\n", | |
" <td>2017-02-21 16:52:00.0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>amd-20161231.xml</td>\n", | |
" <td>1</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0000002969-17-000010</td>\n", | |
" <td>2969</td>\n", | |
" <td>AIR PRODUCTS & CHEMICALS INC /DE/</td>\n", | |
" <td>2810</td>\n", | |
" <td>US</td>\n", | |
" <td>PA</td>\n", | |
" <td>ALLENTOWN</td>\n", | |
" <td>18195-1501</td>\n", | |
" <td>7201 HAMILTON BLVD</td>\n", | |
" <td>NaN</td>\n", | |
" <td>...</td>\n", | |
" <td>20161231</td>\n", | |
" <td>2017</td>\n", | |
" <td>Q1</td>\n", | |
" <td>20170127</td>\n", | |
" <td>2017-01-27 14:02:00.0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>apd-20161231.xml</td>\n", | |
" <td>1</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>3 rows × 36 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" adsh cik name sic \\\n", | |
"0 0000002178-17-000019 2178 ADAMS RESOURCES & ENERGY, INC. 5172 \n", | |
"1 0000002488-17-000043 2488 ADVANCED MICRO DEVICES INC 3674 \n", | |
"2 0000002969-17-000010 2969 AIR PRODUCTS & CHEMICALS INC /DE/ 2810 \n", | |
"\n", | |
" countryba stprba cityba zipba bas1 bas2 \\\n", | |
"0 US TX HOUSTON 77027 17 S. BRIAR HOLLOW LN. NaN \n", | |
"1 US CA SUNNYVALE 94085 ONE AMD PL MS 68 \n", | |
"2 US PA ALLENTOWN 18195-1501 7201 HAMILTON BLVD NaN \n", | |
"\n", | |
" ... period fy fp filed accepted prevrpt detail \\\n", | |
"0 ... 20161231 2016 FY 20170331 2017-03-31 12:28:00.0 0 1 \n", | |
"1 ... 20161231 2016 FY 20170221 2017-02-21 16:52:00.0 0 1 \n", | |
"2 ... 20161231 2017 Q1 20170127 2017-01-27 14:02:00.0 0 1 \n", | |
"\n", | |
" instance nciks aciks \n", | |
"0 ae-20161231.xml 1 NaN \n", | |
"1 amd-20161231.xml 1 NaN \n", | |
"2 apd-20161231.xml 1 NaN \n", | |
"\n", | |
"[3 rows x 36 columns]" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sub.head(3)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>adsh</th>\n", | |
" <th>tag</th>\n", | |
" <th>version</th>\n", | |
" <th>coreg</th>\n", | |
" <th>ddate</th>\n", | |
" <th>qtrs</th>\n", | |
" <th>uom</th>\n", | |
" <th>value</th>\n", | |
" <th>footnote</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0001477932-16-014447</td>\n", | |
" <td>EntityCommonStockSharesOutstanding</td>\n", | |
" <td>dei/2014</td>\n", | |
" <td>NaN</td>\n", | |
" <td>20161231</td>\n", | |
" <td>0</td>\n", | |
" <td>shares</td>\n", | |
" <td>67538142</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0001477932-16-014447</td>\n", | |
" <td>AccountsAndOtherReceivablesNetCurrent</td>\n", | |
" <td>us-gaap/2016</td>\n", | |
" <td>NaN</td>\n", | |
" <td>20151231</td>\n", | |
" <td>0</td>\n", | |
" <td>USD</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0001477932-16-014447</td>\n", | |
" <td>AccountsAndOtherReceivablesNetCurrent</td>\n", | |
" <td>us-gaap/2016</td>\n", | |
" <td>NaN</td>\n", | |
" <td>20160930</td>\n", | |
" <td>0</td>\n", | |
" <td>USD</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" adsh tag version \\\n", | |
"0 0001477932-16-014447 EntityCommonStockSharesOutstanding dei/2014 \n", | |
"1 0001477932-16-014447 AccountsAndOtherReceivablesNetCurrent us-gaap/2016 \n", | |
"2 0001477932-16-014447 AccountsAndOtherReceivablesNetCurrent us-gaap/2016 \n", | |
"\n", | |
" coreg ddate qtrs uom value footnote \n", | |
"0 NaN 20161231 0 shares 67538142 NaN \n", | |
"1 NaN 20151231 0 USD NaN NaN \n", | |
"2 NaN 20160930 0 USD NaN NaN " | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"num.head(3)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>tag</th>\n", | |
" <th>version</th>\n", | |
" <th>custom</th>\n", | |
" <th>abstract</th>\n", | |
" <th>datatype</th>\n", | |
" <th>iord</th>\n", | |
" <th>crdr</th>\n", | |
" <th>tlabel</th>\n", | |
" <th>doc</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>AccountsPayableCurrent</td>\n", | |
" <td>us-gaap/2015</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>monetary</td>\n", | |
" <td>I</td>\n", | |
" <td>C</td>\n", | |
" <td>Accounts Payable, Current</td>\n", | |
" <td>Carrying value as of the balance sheet date of...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>AccountsPayableRelatedPartiesCurrent</td>\n", | |
" <td>us-gaap/2015</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>monetary</td>\n", | |
" <td>I</td>\n", | |
" <td>C</td>\n", | |
" <td>Accounts Payable, Related Parties, Current</td>\n", | |
" <td>Amount for accounts payable to related parties...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>AccountsReceivableNetCurrent</td>\n", | |
" <td>us-gaap/2015</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>monetary</td>\n", | |
" <td>I</td>\n", | |
" <td>D</td>\n", | |
" <td>Accounts Receivable, Net, Current</td>\n", | |
" <td>Amount due from customers or clients, within o...</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" tag version custom abstract \\\n", | |
"0 AccountsPayableCurrent us-gaap/2015 0 0 \n", | |
"1 AccountsPayableRelatedPartiesCurrent us-gaap/2015 0 0 \n", | |
"2 AccountsReceivableNetCurrent us-gaap/2015 0 0 \n", | |
"\n", | |
" datatype iord crdr tlabel \\\n", | |
"0 monetary I C Accounts Payable, Current \n", | |
"1 monetary I C Accounts Payable, Related Parties, Current \n", | |
"2 monetary I D Accounts Receivable, Net, Current \n", | |
"\n", | |
" doc \n", | |
"0 Carrying value as of the balance sheet date of... \n", | |
"1 Amount for accounts payable to related parties... \n", | |
"2 Amount due from customers or clients, within o... " | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"tag.head(3)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment