Created
January 10, 2016 12:09
-
-
Save FinanceData/d2020557e84c69dd77e2 to your computer and use it in GitHub Desktop.
세종데이터 요약재무 (년)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# 세종데이터 요약제무 (년)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"페이지 이름으로 주재부제표, IFRS연결, IFRS별도를 구분 (table_main0_bus_01.html)\n", | |
"* http://www.sejongdata.com/business_include_fr/table_main0_bus_01.html?no=005930 주재무제표(GAAP)\n", | |
"* http://www.sejongdata.com/business_include_fr/table_main1_bus_01.html?no=005930 K-IFRS(연결)\n", | |
"* http://www.sejongdata.com/business_include_fr/table_main2_bus_01.html?no=005930 K-IFRS(별도)\n", | |
"\n", | |
"\n", | |
"gubun 값으로 년도를 구분(1: 1995~2004, 3: 2005~2014)\n", | |
"* 1995~2004 'http://www.sejongdata.com/business_include_fr/table_main0_bus_01.html?gubun=1&no=005930'\n", | |
"* 2005~2014 'http://www.sejongdata.com/business_include_fr/table_main0_bus_01.html?gubun=2&no=005930'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import re\n", | |
"import requests\n", | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"from pandas_datareader import data, wb" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"host = 'http://www.sejongdata.com/business_include_fr/'\n", | |
" \n", | |
"# 주재무제표 (GAAP)\n", | |
"config_gaap = { \n", | |
" 'url_tmpl': host + 'table_main0_bus_01.html?no=%s&gubun=%s',\n", | |
" 'data_dir': 'data/sj_finstate_year_gaap/'\n", | |
"}\n", | |
"\n", | |
"# K-IFRS(연결)\n", | |
"config_ifrs_con = {\n", | |
" 'url_tmpl': host + 'table_main1_bus_01.html?no=%s&gubun=%s',\n", | |
" 'data_dir': 'data/sj_finstate_year_ifrs_con/'\n", | |
"}\n", | |
"\n", | |
"# K-IFRS(별도)\n", | |
"config_ifrs_sep = {\n", | |
" 'url_tmpl': host + 'table_main2_bus_01.html?no=%s&gubun=%s',\n", | |
" 'data_dir': 'data/sj_finstate_year_ifrs_sep/'\n", | |
"}\n", | |
"\n", | |
"# 아래 3라인중 하나를 선택하여 사용 (comment out)\n", | |
"config = config_gaap # 주재무제표 (GAAP)\n", | |
"config = config_ifrs_con # K-IFRS(연결)\n", | |
"config = config_ifrs_sep # K-IFRS(별도)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import os\n", | |
"\n", | |
"def mkdir_not_ex(filename):\n", | |
" folder=os.path.dirname(filename)\n", | |
" if not os.path.exists(folder):\n", | |
" os.makedirs(folder)\n", | |
" \n", | |
"data_dir = config['data_dir']\n", | |
"mkdir_not_ex(data_dir)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"url_tmpl = config['url_tmpl']\n", | |
"\n", | |
"def get_finstat_year(code):\n", | |
" df_1 = pd.read_html(url_tmpl % (code, '1'))[1]\n", | |
" df_2 = pd.read_html(url_tmpl % (code, '2'))[1]\n", | |
"\n", | |
" df_1 = df_1.T\n", | |
" df_2 = df_2.T\n", | |
" \n", | |
" # 컬럼 이름 지정\n", | |
" cols = ['날짜', '매출액', '영업이익', '순이익', '연결순이익', '자산총계', '부채총계', '자본총계']\n", | |
" df_1.columns = cols \n", | |
" df_2.columns = cols\n", | |
" \n", | |
" # 첫번째 row 제거\n", | |
" df_1 = df_1[1:]\n", | |
" df_2 = df_2[1:] \n", | |
" \n", | |
" # df_1, df_2를 붙여서 새로운 df 생성 \n", | |
" df = df_1.append(df_2)\n", | |
"\n", | |
" # df['년도']\n", | |
" # \"2014.12 (IFRS 연결)\" to \"2014-12\"\n", | |
" df['종류'] = df['날짜'].apply(lambda x: x.split(' (')[1].replace(')', ''))\n", | |
" df['날짜'] = df['날짜'].apply(lambda x: x.split(' ')[0].replace('.', '-'))\n", | |
" df['날짜'] = pd.to_datetime(df['날짜'])\n", | |
"\n", | |
" # 타입을 object 에서 float 로 변환\n", | |
" cols = ['매출액', '영업이익', '순이익', '연결순이익', '자산총계', '부채총계', '자본총계']\n", | |
" try:\n", | |
" df[cols] = df[cols].astype(float) \n", | |
" except:\n", | |
" pass\n", | |
"\n", | |
" df = df.set_index('날짜')\n", | |
" return df\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>매출액</th>\n", | |
" <th>영업이익</th>\n", | |
" <th>순이익</th>\n", | |
" <th>연결순이익</th>\n", | |
" <th>자산총계</th>\n", | |
" <th>부채총계</th>\n", | |
" <th>자본총계</th>\n", | |
" <th>종류</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>날짜</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1996-12-01</th>\n", | |
" <td>158745</td>\n", | |
" <td>14468</td>\n", | |
" <td>1642</td>\n", | |
" <td>-</td>\n", | |
" <td>158385</td>\n", | |
" <td>108130</td>\n", | |
" <td>50255</td>\n", | |
" <td>GAAP 개별</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1997-12-01</th>\n", | |
" <td>184654</td>\n", | |
" <td>28562</td>\n", | |
" <td>1260</td>\n", | |
" <td>-</td>\n", | |
" <td>230655</td>\n", | |
" <td>172356</td>\n", | |
" <td>58299</td>\n", | |
" <td>GAAP 개별</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1998-12-01</th>\n", | |
" <td>200842</td>\n", | |
" <td>31000</td>\n", | |
" <td>3216</td>\n", | |
" <td>-</td>\n", | |
" <td>207761</td>\n", | |
" <td>138061</td>\n", | |
" <td>69700</td>\n", | |
" <td>GAAP 개별</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1999-12-01</th>\n", | |
" <td>261178</td>\n", | |
" <td>44815</td>\n", | |
" <td>31704</td>\n", | |
" <td>-</td>\n", | |
" <td>247098</td>\n", | |
" <td>113782</td>\n", | |
" <td>133316</td>\n", | |
" <td>GAAP 개별</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2000-12-01</th>\n", | |
" <td>342838</td>\n", | |
" <td>74352</td>\n", | |
" <td>60145</td>\n", | |
" <td>-</td>\n", | |
" <td>268950</td>\n", | |
" <td>107022</td>\n", | |
" <td>161928</td>\n", | |
" <td>GAAP 개별</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2001-12-01</th>\n", | |
" <td>323804</td>\n", | |
" <td>22953</td>\n", | |
" <td>29469</td>\n", | |
" <td>-</td>\n", | |
" <td>279194</td>\n", | |
" <td>84457</td>\n", | |
" <td>194737</td>\n", | |
" <td>GAAP 개별</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2002-12-01</th>\n", | |
" <td>405116</td>\n", | |
" <td>72447</td>\n", | |
" <td>70518</td>\n", | |
" <td>-</td>\n", | |
" <td>344396</td>\n", | |
" <td>101293</td>\n", | |
" <td>243103</td>\n", | |
" <td>GAAP 개별</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2003-12-01</th>\n", | |
" <td>435820</td>\n", | |
" <td>71927</td>\n", | |
" <td>59590</td>\n", | |
" <td>-</td>\n", | |
" <td>392034</td>\n", | |
" <td>97889</td>\n", | |
" <td>294145</td>\n", | |
" <td>GAAP 개별</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2004-12-01</th>\n", | |
" <td>576324</td>\n", | |
" <td>120169</td>\n", | |
" <td>107867</td>\n", | |
" <td>-</td>\n", | |
" <td>438165</td>\n", | |
" <td>93761</td>\n", | |
" <td>344404</td>\n", | |
" <td>GAAP 개별</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2005-12-01</th>\n", | |
" <td>574577</td>\n", | |
" <td>80598</td>\n", | |
" <td>76402</td>\n", | |
" <td>-</td>\n", | |
" <td>505388</td>\n", | |
" <td>108822</td>\n", | |
" <td>396566</td>\n", | |
" <td>GAAP 개별</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2006-12-01</th>\n", | |
" <td>589728</td>\n", | |
" <td>69339</td>\n", | |
" <td>79261</td>\n", | |
" <td>-</td>\n", | |
" <td>578091</td>\n", | |
" <td>125485</td>\n", | |
" <td>452606</td>\n", | |
" <td>GAAP 개별</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2007-12-01</th>\n", | |
" <td>631760</td>\n", | |
" <td>59429</td>\n", | |
" <td>74250</td>\n", | |
" <td>-</td>\n", | |
" <td>652253</td>\n", | |
" <td>136646</td>\n", | |
" <td>515606</td>\n", | |
" <td>GAAP 개별</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2008-12-01</th>\n", | |
" <td>729530</td>\n", | |
" <td>41341</td>\n", | |
" <td>55259</td>\n", | |
" <td>-</td>\n", | |
" <td>725192</td>\n", | |
" <td>144057</td>\n", | |
" <td>581135</td>\n", | |
" <td>GAAP 개별</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2009-12-01</th>\n", | |
" <td>897728</td>\n", | |
" <td>63485</td>\n", | |
" <td>96495</td>\n", | |
" <td>-</td>\n", | |
" <td>860242</td>\n", | |
" <td>191994</td>\n", | |
" <td>668247</td>\n", | |
" <td>GAAP 개별</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2010-12-01</th>\n", | |
" <td>1122495</td>\n", | |
" <td>149241</td>\n", | |
" <td>132365</td>\n", | |
" <td>-</td>\n", | |
" <td>1071790</td>\n", | |
" <td>272118</td>\n", | |
" <td>799672</td>\n", | |
" <td>IFRS 별도</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2011-12-01</th>\n", | |
" <td>1208160</td>\n", | |
" <td>117017</td>\n", | |
" <td>100292</td>\n", | |
" <td>-</td>\n", | |
" <td>1174576</td>\n", | |
" <td>285551</td>\n", | |
" <td>889025</td>\n", | |
" <td>IFRS 별도</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2012-12-01</th>\n", | |
" <td>1412064</td>\n", | |
" <td>185104</td>\n", | |
" <td>173985</td>\n", | |
" <td>-</td>\n", | |
" <td>1332641</td>\n", | |
" <td>273762</td>\n", | |
" <td>1058879</td>\n", | |
" <td>IFRS 별도</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2013-12-01</th>\n", | |
" <td>1583721</td>\n", | |
" <td>218070</td>\n", | |
" <td>179295</td>\n", | |
" <td>-</td>\n", | |
" <td>1548260</td>\n", | |
" <td>324552</td>\n", | |
" <td>1223708</td>\n", | |
" <td>IFRS 별도</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2014-12-01</th>\n", | |
" <td>1378255</td>\n", | |
" <td>139250</td>\n", | |
" <td>145918</td>\n", | |
" <td>-</td>\n", | |
" <td>1640606</td>\n", | |
" <td>313835</td>\n", | |
" <td>1326771</td>\n", | |
" <td>IFRS 별도</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2015-12-01</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>GAAP 개별</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 매출액 영업이익 순이익 연결순이익 자산총계 부채총계 자본총계 종류\n", | |
"날짜 \n", | |
"1996-12-01 158745 14468 1642 - 158385 108130 50255 GAAP 개별\n", | |
"1997-12-01 184654 28562 1260 - 230655 172356 58299 GAAP 개별\n", | |
"1998-12-01 200842 31000 3216 - 207761 138061 69700 GAAP 개별\n", | |
"1999-12-01 261178 44815 31704 - 247098 113782 133316 GAAP 개별\n", | |
"2000-12-01 342838 74352 60145 - 268950 107022 161928 GAAP 개별\n", | |
"2001-12-01 323804 22953 29469 - 279194 84457 194737 GAAP 개별\n", | |
"2002-12-01 405116 72447 70518 - 344396 101293 243103 GAAP 개별\n", | |
"2003-12-01 435820 71927 59590 - 392034 97889 294145 GAAP 개별\n", | |
"2004-12-01 576324 120169 107867 - 438165 93761 344404 GAAP 개별\n", | |
"2005-12-01 574577 80598 76402 - 505388 108822 396566 GAAP 개별\n", | |
"2006-12-01 589728 69339 79261 - 578091 125485 452606 GAAP 개별\n", | |
"2007-12-01 631760 59429 74250 - 652253 136646 515606 GAAP 개별\n", | |
"2008-12-01 729530 41341 55259 - 725192 144057 581135 GAAP 개별\n", | |
"2009-12-01 897728 63485 96495 - 860242 191994 668247 GAAP 개별\n", | |
"2010-12-01 1122495 149241 132365 - 1071790 272118 799672 IFRS 별도\n", | |
"2011-12-01 1208160 117017 100292 - 1174576 285551 889025 IFRS 별도\n", | |
"2012-12-01 1412064 185104 173985 - 1332641 273762 1058879 IFRS 별도\n", | |
"2013-12-01 1583721 218070 179295 - 1548260 324552 1223708 IFRS 별도\n", | |
"2014-12-01 1378255 139250 145918 - 1640606 313835 1326771 IFRS 별도\n", | |
"2015-12-01 NaN NaN NaN NaN NaN NaN NaN GAAP 개별" | |
] | |
}, | |
"execution_count": 33, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# 테스트\n", | |
"code = '005930'\n", | |
"fname = code + '.csv'\n", | |
"df = get_finstat_year(code)\n", | |
"df.to_csv(fname)\n", | |
"df = pd.read_csv(fname, index_col='날짜')\n", | |
"os.remove(fname)\n", | |
"\n", | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 종목 전체\n", | |
"* https://gist.githubusercontent.com/plusjune/f3edace16e89c1a9a156\n", | |
"\n", | |
"\n", | |
"참고 사항\n", | |
"* BadStatusLine 에러가 난다면 다시 시행 \n", | |
"* 만들어진 파일은 SKIP하므로 여러번 다시 시행해도 상관없음" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import os.path\n", | |
"\n", | |
"master_df = pd.read_csv('https://goo.gl/i3nW13', index_col='종목코드')\n", | |
"\n", | |
"for code, row in master_df.iterrows():\n", | |
" fname = code + '.csv'\n", | |
" if os.path.isfile(config['data_dir'] + fname):\n", | |
" print (code, row['종목명'], fname, '파일 존재 SKIP')\n", | |
" else:\n", | |
" print (code, row['종목명'], fname)\n", | |
" df = get_finstat_year(code)\n", | |
" df.to_csv(config['data_dir'] + fname)" | |
] | |
}, | |
{ | |
"cell_type": "raw", | |
"metadata": { | |
"collapsed": true | |
}, | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.4.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment