Skip to content

Instantly share code, notes, and snippets.

@InJeCTrL
Last active December 29, 2019 04:01
Show Gist options
  • Select an option

  • Save InJeCTrL/90bb88c376205cc2cbca998bd032e1a0 to your computer and use it in GitHub Desktop.

Select an option

Save InJeCTrL/90bb88c376205cc2cbca998bd032e1a0 to your computer and use it in GitHub Desktop.
上证50成分股逐窗口协方差计算
import numpy as np
import pandas as pd
import os
def CalcCovList(code_stock, tbl_IH, tbl_stock, W = 20, n = 1, Unit = 1):
'''
W: 窗口大小(每个窗口包含的分钟数)
n: 期货行情比成分股行情延后的分钟数
Unit: 窗口每次滑动的行数
Return: 时间-协方差数据框
'''
# 对齐的成分股行情数据
tbl_S = tbl_stock[tbl_stock['Unnamed: 0'] >= tbl_IH['Unnamed: 0'][0]]
tbl_S = tbl_S.reset_index(drop = True)
# 对齐的期货行情数据
tbl_I = tbl_IH[tbl_IH['Unnamed: 0'] >= tbl_S['Unnamed: 0'][0]]
tbl_I = tbl_I.reset_index(drop = True)
# 时间-协方差列表
COVXY = []
# 窗口数
num_wnd = min(tbl_S.shape[0] - W, tbl_I.shape[0] - W - n)
# 当前进度
progress_now = 0
ok = 0
print('开始计算[ ' + code_stock + ' ]')
# 两表窗口滑动
for i_wnd in range(0, num_wnd, Unit):
# 计算两窗口协方差
# 成分股单窗口数据
data_WndS = tbl_S.iloc[range(i_wnd + Unit - 1, i_wnd + W, Unit), [0, 1, 4]]
# 期货单窗口数据
data_WndI = tbl_I.iloc[range(i_wnd + Unit + n - 1, i_wnd + W + n, Unit), [0, 1, 4]]
# 计算XY协方差(COV(X,Y))
COVXY.append([tbl_S['Unnamed: 0'][i_wnd + W], np.cov(data_WndS['close'], data_WndI['close'])[0][1]])
progress_now += 1
if progress_now > ok and progress_now / num_wnd * 100 % 5 == 0:
ok = progress_now
print(str(ok / num_wnd * 100) + '%')
tbl_COVXY = pd.DataFrame(COVXY, columns=['Unnamed: 0', 'COV'])
tbl_COVXY.set_index('Unnamed: 0', inplace = True)
return tbl_COVXY
# 期货行情
tbl_IH = pd.read_csv('期货历史行情数据_上证50(IH9999).csv')
# 最终保存的数据表
tbl_Save = pd.DataFrame(index = tbl_IH['Unnamed: 0'])
tbl_Save.to_csv('./wonderful.csv')
for root, dirs, files in os.walk('.'):
# 每个成分股行情文件
for name in files:
if name != '期货历史行情数据_上证50(IH9999).csv' and name != '指数历史行情数据_上证50.csv' and name != '指数历史行情数据_上证50成分股.rar' and name != 'deal.py' and name != 'wonderful.csv':
# 成分股行情内容
tbl_stock = pd.read_csv(name)
# 成分股代码
code_stock = name[17:26]
# 时间-协方差列表
df_Cov = CalcCovList(code_stock, tbl_IH, tbl_stock)
tbl_Save = pd.read_csv('wonderful.csv', index_col = 'Unnamed: 0')
tbl_Save[code_stock] = df_Cov
tbl_Save.to_csv('wonderful.csv', index = 'Unnamed: 0')
print('[ ' + code_stock + ' ]处理完成')
import numpy as np
import pandas as pd
import os
# 协方差数据表
tbl_COV = pd.read_csv('wonderful.csv')
# open close 字典
tbl_OC = dict()
print('生成open close字典...', end = '', flush = True)
# 获取成分股列表与open/close列表
for root, dirs, files in os.walk('.'):
# 每个成分股行情文件
for name in files:
if name != '期货历史行情数据_上证50(IH9999).csv' and name != '指数历史行情数据_上证50.csv' and name != '指数历史行情数据_上证50成分股.rar' and name != 'deal.py' and name != 'second.py' and name != 'third.py' and name != 'wonderful.csv':
# 成分股行情内容
tbl_stock = pd.read_csv(name)
# 只提取open close 时间
tbl_stock = tbl_stock[['Unnamed: 0', 'close', 'open']]
# 成分股代码
code_stock = name[17:26]
tbl_OC[code_stock] = tbl_stock
print('ok')
# 协方差最大的股票代码
mlbl = []
# 开盘价
OPEN = []
# 收盘价
CLOSE = []
for i in range(len(tbl_COV)):
print('计算第 [ ' + str(i + 1) + ' / ' + str(len(tbl_COV)) + ' ]行...', end = '', flush = True)
m = -999
tlbl = ''
for j in range(1, 51):
if (tbl_COV.iloc[i, j] > m):
tlbl = tbl_COV.columns[j]
m = tbl_COV.iloc[i, j]
mlbl.append(tlbl)
if tlbl == '':
OPEN.append('')
CLOSE.append('')
else:
tmp = tbl_OC[tlbl]
line = tmp[tmp['Unnamed: 0'] == tbl_COV['Unnamed: 0'][i]]
if len(line) == 0:
OPEN.append('')
CLOSE.append('')
else:
OPEN.append(tmp[tmp['Unnamed: 0'] == tbl_COV['Unnamed: 0'][i]].iloc[0, 2])
CLOSE.append(tmp[tmp['Unnamed: 0'] == tbl_COV['Unnamed: 0'][i]].iloc[0, 1])
print('ok')
tbl_COV['MAX'] = mlbl
tbl_COV['OPEN'] = OPEN
tbl_COV['CLOSE'] = CLOSE
tbl_COV.set_index('Unnamed: 0')
tbl_COV.to_csv('wonderful.csv', index_label = '')
import numpy as np
import pandas as pd
# 协方差数据表
tbl_COV = pd.read_csv('wonderful.csv')
tbl_IH = pd.read_csv('期货历史行情数据_上证50(IH9999).csv')
# 协方差数据表行数
len_COV = len(tbl_COV)
# 新建一列存价格
tmp = ['x'] * len_COV
if 'Value' not in tbl_COV.columns:
tbl_COV['Value'] = tmp
# 初始价格
value = 100.0
# 新数据行数
# newdata = 0
# 阈值 K
K = 0.05
for i in range(len_COV - 1):
print('计算第 [ ' + str(i + 1) + ' / ' + str(len(tbl_COV) - 1) + ' ] 行...', end = '', flush = True)
if (tbl_COV['MAX'][i] is not np.nan) and (tbl_COV['Value'][i] is 'x'):
Q = (tbl_COV['CLOSE'][i] - tbl_COV['OPEN'][i]) / tbl_COV['CLOSE'][i]
line_IH = tbl_IH[tbl_IH['Unnamed: 0'] == tbl_COV['Unnamed: 0.1'][i + 1]].reset_index()
P = (line_IH['close'][0] - line_IH['open'][0]) / line_IH['close'][0]
if tbl_COV[tbl_COV['MAX'][i]][i] > K:
# PQ同号
if (P >= 0 and Q >= 0) or (P <= 0 and Q <= 0):
value *= (1 + abs(P))
# PQ异号
else:
value *= (1 - abs(P))
tmp[i] = value
print('ok')
elif tbl_COV[tbl_COV['MAX'][i]][i] < -K:
# PQ同号
if (P >= 0 and Q >= 0) or (P <= 0 and Q <= 0):
value *= (1 - abs(P))
# PQ异号
else:
value *= (1 + abs(P))
tmp[i] = value
print('ok')
else:
tmp[i] = tbl_COV['Value'][i]
print('pass')
# newdata += 1
else:
tmp[i] = tbl_COV['Value'][i]
if tbl_COV['Value'][i] is not 'x':
value = (float)(tbl_COV['Value'][i])
print('pass')
'''
# 每一千条新数据保存一次
if newdata == 1000:
print('分片保存...', end = '', flush = True)
tbl_COV['Value'] = tmp
tbl_COV.to_csv('wonderful.csv', index = False)
newdata = 0
print('ok')
'''
tbl_COV['Value'] = tmp
tbl_COV.to_csv('wonderful.csv', index = False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment