Last active
September 24, 2022 06:32
-
-
Save dear983604/7545b0ebb01f0c7ea0e9dd84c5af87b0 to your computer and use it in GitHub Desktop.
Save stock information tutorial by csv & sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# 爬取股票資料並存檔" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 爬蟲" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"#導入套件\n", | |
"%matplotlib inline\n", | |
"import datetime\n", | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"import matplotlib.pyplot as plt\n", | |
"\n", | |
"#專門抓台股的套件\n", | |
"import twstock" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#設定爬蟲股票代號\n", | |
"sid = '2330'\n", | |
"data=twstock.Stock(sid)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### 很常遇到這種事,唯一解決方法就是等一段時間\n", | |
"\n", | |
"![拒絕連線](Images/拒絕連線.JPG)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>capacity</th>\n", | |
" <th>turnover</th>\n", | |
" <th>open</th>\n", | |
" <th>high</th>\n", | |
" <th>low</th>\n", | |
" <th>close</th>\n", | |
" <th>change</th>\n", | |
" <th>transaction</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>date</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2019-03-28</th>\n", | |
" <td>13998054</td>\n", | |
" <td>3379510718</td>\n", | |
" <td>240.5</td>\n", | |
" <td>242.5</td>\n", | |
" <td>240.0</td>\n", | |
" <td>242.0</td>\n", | |
" <td>0.5</td>\n", | |
" <td>3948</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2019-03-29</th>\n", | |
" <td>31024810</td>\n", | |
" <td>7568601014</td>\n", | |
" <td>243.0</td>\n", | |
" <td>245.5</td>\n", | |
" <td>240.5</td>\n", | |
" <td>245.5</td>\n", | |
" <td>3.5</td>\n", | |
" <td>6712</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2019-04-01</th>\n", | |
" <td>35330656</td>\n", | |
" <td>8758161220</td>\n", | |
" <td>251.0</td>\n", | |
" <td>251.0</td>\n", | |
" <td>245.0</td>\n", | |
" <td>245.5</td>\n", | |
" <td>0.0</td>\n", | |
" <td>13292</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2019-04-02</th>\n", | |
" <td>25189053</td>\n", | |
" <td>6226523038</td>\n", | |
" <td>249.5</td>\n", | |
" <td>249.5</td>\n", | |
" <td>246.0</td>\n", | |
" <td>246.0</td>\n", | |
" <td>0.5</td>\n", | |
" <td>9171</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2019-04-03</th>\n", | |
" <td>28581323</td>\n", | |
" <td>7075597562</td>\n", | |
" <td>249.0</td>\n", | |
" <td>249.0</td>\n", | |
" <td>246.5</td>\n", | |
" <td>246.5</td>\n", | |
" <td>0.5</td>\n", | |
" <td>8067</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2019-04-08</th>\n", | |
" <td>46426821</td>\n", | |
" <td>11682512762</td>\n", | |
" <td>251.0</td>\n", | |
" <td>253.0</td>\n", | |
" <td>250.5</td>\n", | |
" <td>253.0</td>\n", | |
" <td>6.5</td>\n", | |
" <td>16430</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2019-04-09</th>\n", | |
" <td>22745674</td>\n", | |
" <td>5760337782</td>\n", | |
" <td>253.0</td>\n", | |
" <td>254.0</td>\n", | |
" <td>252.0</td>\n", | |
" <td>254.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>8323</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2019-04-10</th>\n", | |
" <td>33174934</td>\n", | |
" <td>8403528302</td>\n", | |
" <td>253.0</td>\n", | |
" <td>254.5</td>\n", | |
" <td>252.0</td>\n", | |
" <td>254.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>9992</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2019-04-11</th>\n", | |
" <td>25146840</td>\n", | |
" <td>6347861680</td>\n", | |
" <td>253.0</td>\n", | |
" <td>254.0</td>\n", | |
" <td>251.5</td>\n", | |
" <td>252.0</td>\n", | |
" <td>-2.0</td>\n", | |
" <td>8284</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2019-04-12</th>\n", | |
" <td>13888148</td>\n", | |
" <td>3496444666</td>\n", | |
" <td>251.5</td>\n", | |
" <td>253.0</td>\n", | |
" <td>251.0</td>\n", | |
" <td>252.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>4931</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" capacity turnover open high low close change \\\n", | |
"date \n", | |
"2019-03-28 13998054 3379510718 240.5 242.5 240.0 242.0 0.5 \n", | |
"2019-03-29 31024810 7568601014 243.0 245.5 240.5 245.5 3.5 \n", | |
"2019-04-01 35330656 8758161220 251.0 251.0 245.0 245.5 0.0 \n", | |
"2019-04-02 25189053 6226523038 249.5 249.5 246.0 246.0 0.5 \n", | |
"2019-04-03 28581323 7075597562 249.0 249.0 246.5 246.5 0.5 \n", | |
"2019-04-08 46426821 11682512762 251.0 253.0 250.5 253.0 6.5 \n", | |
"2019-04-09 22745674 5760337782 253.0 254.0 252.0 254.0 1.0 \n", | |
"2019-04-10 33174934 8403528302 253.0 254.5 252.0 254.0 0.0 \n", | |
"2019-04-11 25146840 6347861680 253.0 254.0 251.5 252.0 -2.0 \n", | |
"2019-04-12 13888148 3496444666 251.5 253.0 251.0 252.0 0.0 \n", | |
"\n", | |
" transaction \n", | |
"date \n", | |
"2019-03-28 3948 \n", | |
"2019-03-29 6712 \n", | |
"2019-04-01 13292 \n", | |
"2019-04-02 9171 \n", | |
"2019-04-03 8067 \n", | |
"2019-04-08 16430 \n", | |
"2019-04-09 8323 \n", | |
"2019-04-10 9992 \n", | |
"2019-04-11 8284 \n", | |
"2019-04-12 4931 " | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#用fetch_from抓取資料,指定日期放入dataframe裡\n", | |
"df = pd.DataFrame(data.fetch_from(2018,1))\n", | |
"\n", | |
"#設定index\n", | |
"df.set_index('date', inplace = True)\n", | |
"\n", | |
"df.tail(10)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 存檔\n", | |
"#### dataframe裡有兩種常用存檔方式\n", | |
"1. to_csv:存成csv檔\n", | |
"2. to_sql:存在sqlite資料庫裡\n", | |
"\n", | |
" * 先至官網下載sqlite資料庫 https://sqlitebrowser.org/dl/\n", | |
" * Anaconda資料庫已有sqlite套件,若沒有可執行 >>pip install pysqlite3 " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#用to_csv存檔,並命名為 \"股票代號.csv\"\n", | |
"df.to_csv(sid+'.csv')\n", | |
"\n", | |
"#用「utf_8_sig」編碼\n", | |
"#df.to_csv(sid+'_utf.csv', encoding='utf_8_sig')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#用to_sql存檔\n", | |
"import sqlite3\n", | |
"\n", | |
"# 開啟一個名為\"股票代號.db\"的檔案,並與sqlite3的資料庫連結\n", | |
"con = sqlite3.connect(sid+'.db')\n", | |
"\n", | |
"#把df寫入sqlite3裡,參數為(檔名, 資料庫, 取代原始資料)\n", | |
"df.to_sql(sid, con, if_exists='replace')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment