Skip to content

Instantly share code, notes, and snippets.

@fo40225
Created May 16, 2022 13:06
Show Gist options
  • Save fo40225/d397f43f56ddecaf3c80b76ce704f2c9 to your computer and use it in GitHub Desktop.
Save fo40225/d397f43f56ddecaf3c80b76ce704f2c9 to your computer and use it in GitHub Desktop.
證交所股淨比
import re
import urllib.request
import pandas as pd
from io import BytesIO
from zipfile import ZipFile
from IPython.display import display
import matplotlib.pyplot as plt
import numpy as np
datas = pd.DataFrame(columns=[
'year',
'month',
'cap',
'vol',
'idx',
'turnover',
'pe',
'dividend_yield',
'pbr'
])
for y in reversed(range(1999,2023)):
for m in reversed(range(1,13)):
strdate = f'{y}{str(m).zfill(2)}'
url = f'https://www.twse.com.tw/statistics/count?url=/staticFiles/inspection/inspection/02/001/{strdate}_C02001.zip'
response = urllib.request.urlopen(url)
b = BytesIO(response.read())
try:
ZipFile(b)
except:
continue
with ZipFile(b) as my_zip_file:
for contained_file in my_zip_file.namelist():
b = my_zip_file.open(contained_file)
if ( y==1999 and m < 3 ):
df = pd.read_excel(b, header=None, dtype=str, skiprows=29, nrows=1)
df1 = df
df = pd.read_excel(b, header=None, dtype=str, skiprows=62, nrows=1)
df2 = df
# display(df1)
# display(df2)
cap = df1[5]
vol = df1[7]
idx = df1[9]
turnover = df2[1]
pe = df2[2]
dividend_yield = df2[3]
pbr = df2[4]
elif ( y==1999 and m < 4 ):
df = pd.read_excel(b, header=None, dtype=str, skiprows=29, nrows=1)
df1 = df
df = pd.read_excel(b, header=None, dtype=str, skiprows=62, nrows=1)
df2 = df
# display(df1)
# display(df2)
cap = df1[5]
vol = df1[7]
idx = df1[9]
turnover = df2[1]
pe = df2[2]
dividend_yield = df2[3]
pbr = df2[4]
elif ( y==1999 and m < 5 ):
df = pd.read_excel(b, header=None, dtype=str, skiprows=29, nrows=1)
df1 = df
df = pd.read_excel(b, header=None, dtype=str, skiprows=60, nrows=1)
df2 = df
# display(df1)
# display(df2)
cap = df1[5]
vol = df1[7]
idx = df1[9]
turnover = df2[1]
pe = df2[2]
dividend_yield = df2[3]
pbr = df2[4]
elif ( y==1999 and m < 6 ):
df = pd.read_excel(b, header=None, dtype=str, skiprows=29, nrows=1)
df1 = df
df = pd.read_excel(b, header=None, dtype=str, skiprows=61, nrows=1)
df2 = df
# display(df1)
# display(df2)
cap = df1[5]
vol = df1[7]
idx = df1[9]
turnover = df2[1]
pe = df2[2]
dividend_yield = df2[3]
pbr = df2[4]
elif ( y==1999 and m < 7 ):
df = pd.read_excel(b, header=None, dtype=str, skiprows=29, nrows=1)
df1 = df
df = pd.read_excel(b, header=None, dtype=str, skiprows=63, nrows=1)
df2 = df
# display(df1)
# display(df2)
cap = df1[5]
vol = df1[7]
idx = df1[9]
turnover = df2[1]
pe = df2[2]
dividend_yield = df2[3]
pbr = df2[4]
elif y < 2000 or (y == 2000 and m < 1) :
df = pd.read_excel(b, header=None, dtype=str, skiprows=30, nrows=1)
df1 = df
df = pd.read_excel(b, header=None, dtype=str, skiprows=64, nrows=1)
df2 = df
# display(df1)
# display(df2)
cap = df1[5]
vol = df1[7]
idx = df1[9]
turnover = df2[1]
pe = df2[2]
dividend_yield = df2[3]
pbr = df2[4]
elif y < 2000 or (y == 2000 and m < 2):
df = pd.read_excel(b, header=None, dtype=str, skiprows=29, nrows=1)
df1 = df
df = pd.read_excel(b, header=None, dtype=str, skiprows=65, nrows=1)
df2 = df
# display(df1)
# display(df2)
cap = df1[5]
vol = df1[7]
idx = df1[9]
turnover = df2[1]
pe = df2[2]
dividend_yield = df2[3]
pbr = df2[4]
elif y < 2000 or (y == 2000 and m < 5):
df = pd.read_excel(b, header=None, dtype=str, skiprows=30, nrows=1)
df1 = df
df = pd.read_excel(b, header=None, dtype=str, skiprows=66, nrows=1)
df2 = df
# display(df1)
# display(df2)
cap = df1[5]
vol = df1[7]
idx = df1[9]
turnover = df2[1]
pe = df2[2]
dividend_yield = df2[3]
pbr = df2[4]
elif y < 2000 or (y == 2000 and m < 10):
df = pd.read_excel(b, header=None, dtype=str, skiprows=28, nrows=1)
df1 = df
df = pd.read_excel(b, sheet_name=1, header=None, dtype=str, skiprows=26, nrows=1)
df2 = df
# display(df1)
# display(df2)
cap = df1[5]
vol = df1[7]
idx = df1[9]
turnover = df2[1]
pe = df2[2]
dividend_yield = df2[3]
pbr = df2[4]
elif y < 2003 or (y == 2003 and m < 6):
df = pd.read_excel(b, header=None, dtype=str, skiprows=30, nrows=1)
df1 = df
df = pd.read_excel(b, sheet_name=1, header=None, dtype=str, skiprows=28, nrows=1)
df2 = df
# display(df1)
# display(df2)
cap = df1[5]
vol = df1[7]
idx = df1[9]
turnover = df2[1]
pe = df2[2]
dividend_yield = df2[3]
pbr = df2[4]
else:
df = pd.read_excel(b, header=None, dtype=str, skiprows=30, nrows=1)
# display(df)
cap = df[5]
vol = df[7]
idx = df[9]
turnover = df[10]
pe = df[11]
dividend_yield = df[12]
pbr = df[13]
pattern = r"[^\d.]"
data = pd.DataFrame([[
str(y),
str(m),
re.sub(pattern,'',cap[0]),
re.sub(pattern,'',vol[0]),
re.sub(pattern,'',idx[0]),
re.sub(pattern,'',turnover[0]),
re.sub(pattern,'',pe[0]),
re.sub(pattern,'',dividend_yield[0]),
re.sub(pattern,'',pbr[0]),
]],columns=[
'year',
'month',
'cap',
'vol',
'idx',
'turnover',
'pe',
'dividend_yield',
'pbr'
])
datas = pd.concat([datas, data], ignore_index=True)
datas["date"] = datas["year"] + "-" + datas['month']
datas = datas[::-1]
plt.figure(figsize=(18, 9))
plt.plot(datas['date'],datas['pbr'].astype(float))
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment