Created
May 16, 2022 13:06
-
-
Save fo40225/d397f43f56ddecaf3c80b76ce704f2c9 to your computer and use it in GitHub Desktop.
證交所股淨比
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import urllib.request | |
import pandas as pd | |
from io import BytesIO | |
from zipfile import ZipFile | |
from IPython.display import display | |
import matplotlib.pyplot as plt | |
import numpy as np | |
datas = pd.DataFrame(columns=[ | |
'year', | |
'month', | |
'cap', | |
'vol', | |
'idx', | |
'turnover', | |
'pe', | |
'dividend_yield', | |
'pbr' | |
]) | |
for y in reversed(range(1999,2023)): | |
for m in reversed(range(1,13)): | |
strdate = f'{y}{str(m).zfill(2)}' | |
url = f'https://www.twse.com.tw/statistics/count?url=/staticFiles/inspection/inspection/02/001/{strdate}_C02001.zip' | |
response = urllib.request.urlopen(url) | |
b = BytesIO(response.read()) | |
try: | |
ZipFile(b) | |
except: | |
continue | |
with ZipFile(b) as my_zip_file: | |
for contained_file in my_zip_file.namelist(): | |
b = my_zip_file.open(contained_file) | |
if ( y==1999 and m < 3 ): | |
df = pd.read_excel(b, header=None, dtype=str, skiprows=29, nrows=1) | |
df1 = df | |
df = pd.read_excel(b, header=None, dtype=str, skiprows=62, nrows=1) | |
df2 = df | |
# display(df1) | |
# display(df2) | |
cap = df1[5] | |
vol = df1[7] | |
idx = df1[9] | |
turnover = df2[1] | |
pe = df2[2] | |
dividend_yield = df2[3] | |
pbr = df2[4] | |
elif ( y==1999 and m < 4 ): | |
df = pd.read_excel(b, header=None, dtype=str, skiprows=29, nrows=1) | |
df1 = df | |
df = pd.read_excel(b, header=None, dtype=str, skiprows=62, nrows=1) | |
df2 = df | |
# display(df1) | |
# display(df2) | |
cap = df1[5] | |
vol = df1[7] | |
idx = df1[9] | |
turnover = df2[1] | |
pe = df2[2] | |
dividend_yield = df2[3] | |
pbr = df2[4] | |
elif ( y==1999 and m < 5 ): | |
df = pd.read_excel(b, header=None, dtype=str, skiprows=29, nrows=1) | |
df1 = df | |
df = pd.read_excel(b, header=None, dtype=str, skiprows=60, nrows=1) | |
df2 = df | |
# display(df1) | |
# display(df2) | |
cap = df1[5] | |
vol = df1[7] | |
idx = df1[9] | |
turnover = df2[1] | |
pe = df2[2] | |
dividend_yield = df2[3] | |
pbr = df2[4] | |
elif ( y==1999 and m < 6 ): | |
df = pd.read_excel(b, header=None, dtype=str, skiprows=29, nrows=1) | |
df1 = df | |
df = pd.read_excel(b, header=None, dtype=str, skiprows=61, nrows=1) | |
df2 = df | |
# display(df1) | |
# display(df2) | |
cap = df1[5] | |
vol = df1[7] | |
idx = df1[9] | |
turnover = df2[1] | |
pe = df2[2] | |
dividend_yield = df2[3] | |
pbr = df2[4] | |
elif ( y==1999 and m < 7 ): | |
df = pd.read_excel(b, header=None, dtype=str, skiprows=29, nrows=1) | |
df1 = df | |
df = pd.read_excel(b, header=None, dtype=str, skiprows=63, nrows=1) | |
df2 = df | |
# display(df1) | |
# display(df2) | |
cap = df1[5] | |
vol = df1[7] | |
idx = df1[9] | |
turnover = df2[1] | |
pe = df2[2] | |
dividend_yield = df2[3] | |
pbr = df2[4] | |
elif y < 2000 or (y == 2000 and m < 1) : | |
df = pd.read_excel(b, header=None, dtype=str, skiprows=30, nrows=1) | |
df1 = df | |
df = pd.read_excel(b, header=None, dtype=str, skiprows=64, nrows=1) | |
df2 = df | |
# display(df1) | |
# display(df2) | |
cap = df1[5] | |
vol = df1[7] | |
idx = df1[9] | |
turnover = df2[1] | |
pe = df2[2] | |
dividend_yield = df2[3] | |
pbr = df2[4] | |
elif y < 2000 or (y == 2000 and m < 2): | |
df = pd.read_excel(b, header=None, dtype=str, skiprows=29, nrows=1) | |
df1 = df | |
df = pd.read_excel(b, header=None, dtype=str, skiprows=65, nrows=1) | |
df2 = df | |
# display(df1) | |
# display(df2) | |
cap = df1[5] | |
vol = df1[7] | |
idx = df1[9] | |
turnover = df2[1] | |
pe = df2[2] | |
dividend_yield = df2[3] | |
pbr = df2[4] | |
elif y < 2000 or (y == 2000 and m < 5): | |
df = pd.read_excel(b, header=None, dtype=str, skiprows=30, nrows=1) | |
df1 = df | |
df = pd.read_excel(b, header=None, dtype=str, skiprows=66, nrows=1) | |
df2 = df | |
# display(df1) | |
# display(df2) | |
cap = df1[5] | |
vol = df1[7] | |
idx = df1[9] | |
turnover = df2[1] | |
pe = df2[2] | |
dividend_yield = df2[3] | |
pbr = df2[4] | |
elif y < 2000 or (y == 2000 and m < 10): | |
df = pd.read_excel(b, header=None, dtype=str, skiprows=28, nrows=1) | |
df1 = df | |
df = pd.read_excel(b, sheet_name=1, header=None, dtype=str, skiprows=26, nrows=1) | |
df2 = df | |
# display(df1) | |
# display(df2) | |
cap = df1[5] | |
vol = df1[7] | |
idx = df1[9] | |
turnover = df2[1] | |
pe = df2[2] | |
dividend_yield = df2[3] | |
pbr = df2[4] | |
elif y < 2003 or (y == 2003 and m < 6): | |
df = pd.read_excel(b, header=None, dtype=str, skiprows=30, nrows=1) | |
df1 = df | |
df = pd.read_excel(b, sheet_name=1, header=None, dtype=str, skiprows=28, nrows=1) | |
df2 = df | |
# display(df1) | |
# display(df2) | |
cap = df1[5] | |
vol = df1[7] | |
idx = df1[9] | |
turnover = df2[1] | |
pe = df2[2] | |
dividend_yield = df2[3] | |
pbr = df2[4] | |
else: | |
df = pd.read_excel(b, header=None, dtype=str, skiprows=30, nrows=1) | |
# display(df) | |
cap = df[5] | |
vol = df[7] | |
idx = df[9] | |
turnover = df[10] | |
pe = df[11] | |
dividend_yield = df[12] | |
pbr = df[13] | |
pattern = r"[^\d.]" | |
data = pd.DataFrame([[ | |
str(y), | |
str(m), | |
re.sub(pattern,'',cap[0]), | |
re.sub(pattern,'',vol[0]), | |
re.sub(pattern,'',idx[0]), | |
re.sub(pattern,'',turnover[0]), | |
re.sub(pattern,'',pe[0]), | |
re.sub(pattern,'',dividend_yield[0]), | |
re.sub(pattern,'',pbr[0]), | |
]],columns=[ | |
'year', | |
'month', | |
'cap', | |
'vol', | |
'idx', | |
'turnover', | |
'pe', | |
'dividend_yield', | |
'pbr' | |
]) | |
datas = pd.concat([datas, data], ignore_index=True) | |
datas["date"] = datas["year"] + "-" + datas['month'] | |
datas = datas[::-1] | |
plt.figure(figsize=(18, 9)) | |
plt.plot(datas['date'],datas['pbr'].astype(float)) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment