Last active
December 29, 2019 04:01
-
-
Save InJeCTrL/90bb88c376205cc2cbca998bd032e1a0 to your computer and use it in GitHub Desktop.
上证50成分股逐窗口协方差计算
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| import pandas as pd | |
| import os | |
| def CalcCovList(code_stock, tbl_IH, tbl_stock, W = 20, n = 1, Unit = 1): | |
| ''' | |
| W: 窗口大小(每个窗口包含的分钟数) | |
| n: 期货行情比成分股行情延后的分钟数 | |
| Unit: 窗口每次滑动的行数 | |
| Return: 时间-协方差数据框 | |
| ''' | |
| # 对齐的成分股行情数据 | |
| tbl_S = tbl_stock[tbl_stock['Unnamed: 0'] >= tbl_IH['Unnamed: 0'][0]] | |
| tbl_S = tbl_S.reset_index(drop = True) | |
| # 对齐的期货行情数据 | |
| tbl_I = tbl_IH[tbl_IH['Unnamed: 0'] >= tbl_S['Unnamed: 0'][0]] | |
| tbl_I = tbl_I.reset_index(drop = True) | |
| # 时间-协方差列表 | |
| COVXY = [] | |
| # 窗口数 | |
| num_wnd = min(tbl_S.shape[0] - W, tbl_I.shape[0] - W - n) | |
| # 当前进度 | |
| progress_now = 0 | |
| ok = 0 | |
| print('开始计算[ ' + code_stock + ' ]') | |
| # 两表窗口滑动 | |
| for i_wnd in range(0, num_wnd, Unit): | |
| # 计算两窗口协方差 | |
| # 成分股单窗口数据 | |
| data_WndS = tbl_S.iloc[range(i_wnd + Unit - 1, i_wnd + W, Unit), [0, 1, 4]] | |
| # 期货单窗口数据 | |
| data_WndI = tbl_I.iloc[range(i_wnd + Unit + n - 1, i_wnd + W + n, Unit), [0, 1, 4]] | |
| # 计算XY协方差(COV(X,Y)) | |
| COVXY.append([tbl_S['Unnamed: 0'][i_wnd + W], np.cov(data_WndS['close'], data_WndI['close'])[0][1]]) | |
| progress_now += 1 | |
| if progress_now > ok and progress_now / num_wnd * 100 % 5 == 0: | |
| ok = progress_now | |
| print(str(ok / num_wnd * 100) + '%') | |
| tbl_COVXY = pd.DataFrame(COVXY, columns=['Unnamed: 0', 'COV']) | |
| tbl_COVXY.set_index('Unnamed: 0', inplace = True) | |
| return tbl_COVXY | |
| # 期货行情 | |
| tbl_IH = pd.read_csv('期货历史行情数据_上证50(IH9999).csv') | |
| # 最终保存的数据表 | |
| tbl_Save = pd.DataFrame(index = tbl_IH['Unnamed: 0']) | |
| tbl_Save.to_csv('./wonderful.csv') | |
| for root, dirs, files in os.walk('.'): | |
| # 每个成分股行情文件 | |
| for name in files: | |
| if name != '期货历史行情数据_上证50(IH9999).csv' and name != '指数历史行情数据_上证50.csv' and name != '指数历史行情数据_上证50成分股.rar' and name != 'deal.py' and name != 'wonderful.csv': | |
| # 成分股行情内容 | |
| tbl_stock = pd.read_csv(name) | |
| # 成分股代码 | |
| code_stock = name[17:26] | |
| # 时间-协方差列表 | |
| df_Cov = CalcCovList(code_stock, tbl_IH, tbl_stock) | |
| tbl_Save = pd.read_csv('wonderful.csv', index_col = 'Unnamed: 0') | |
| tbl_Save[code_stock] = df_Cov | |
| tbl_Save.to_csv('wonderful.csv', index = 'Unnamed: 0') | |
| print('[ ' + code_stock + ' ]处理完成') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| import pandas as pd | |
| import os | |
| # 协方差数据表 | |
| tbl_COV = pd.read_csv('wonderful.csv') | |
| # open close 字典 | |
| tbl_OC = dict() | |
| print('生成open close字典...', end = '', flush = True) | |
| # 获取成分股列表与open/close列表 | |
| for root, dirs, files in os.walk('.'): | |
| # 每个成分股行情文件 | |
| for name in files: | |
| if name != '期货历史行情数据_上证50(IH9999).csv' and name != '指数历史行情数据_上证50.csv' and name != '指数历史行情数据_上证50成分股.rar' and name != 'deal.py' and name != 'second.py' and name != 'third.py' and name != 'wonderful.csv': | |
| # 成分股行情内容 | |
| tbl_stock = pd.read_csv(name) | |
| # 只提取open close 时间 | |
| tbl_stock = tbl_stock[['Unnamed: 0', 'close', 'open']] | |
| # 成分股代码 | |
| code_stock = name[17:26] | |
| tbl_OC[code_stock] = tbl_stock | |
| print('ok') | |
| # 协方差最大的股票代码 | |
| mlbl = [] | |
| # 开盘价 | |
| OPEN = [] | |
| # 收盘价 | |
| CLOSE = [] | |
| for i in range(len(tbl_COV)): | |
| print('计算第 [ ' + str(i + 1) + ' / ' + str(len(tbl_COV)) + ' ]行...', end = '', flush = True) | |
| m = -999 | |
| tlbl = '' | |
| for j in range(1, 51): | |
| if (tbl_COV.iloc[i, j] > m): | |
| tlbl = tbl_COV.columns[j] | |
| m = tbl_COV.iloc[i, j] | |
| mlbl.append(tlbl) | |
| if tlbl == '': | |
| OPEN.append('') | |
| CLOSE.append('') | |
| else: | |
| tmp = tbl_OC[tlbl] | |
| line = tmp[tmp['Unnamed: 0'] == tbl_COV['Unnamed: 0'][i]] | |
| if len(line) == 0: | |
| OPEN.append('') | |
| CLOSE.append('') | |
| else: | |
| OPEN.append(tmp[tmp['Unnamed: 0'] == tbl_COV['Unnamed: 0'][i]].iloc[0, 2]) | |
| CLOSE.append(tmp[tmp['Unnamed: 0'] == tbl_COV['Unnamed: 0'][i]].iloc[0, 1]) | |
| print('ok') | |
| tbl_COV['MAX'] = mlbl | |
| tbl_COV['OPEN'] = OPEN | |
| tbl_COV['CLOSE'] = CLOSE | |
| tbl_COV.set_index('Unnamed: 0') | |
| tbl_COV.to_csv('wonderful.csv', index_label = '') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| import pandas as pd | |
| # 协方差数据表 | |
| tbl_COV = pd.read_csv('wonderful.csv') | |
| tbl_IH = pd.read_csv('期货历史行情数据_上证50(IH9999).csv') | |
| # 协方差数据表行数 | |
| len_COV = len(tbl_COV) | |
| # 新建一列存价格 | |
| tmp = ['x'] * len_COV | |
| if 'Value' not in tbl_COV.columns: | |
| tbl_COV['Value'] = tmp | |
| # 初始价格 | |
| value = 100.0 | |
| # 新数据行数 | |
| # newdata = 0 | |
| # 阈值 K | |
| K = 0.05 | |
| for i in range(len_COV - 1): | |
| print('计算第 [ ' + str(i + 1) + ' / ' + str(len(tbl_COV) - 1) + ' ] 行...', end = '', flush = True) | |
| if (tbl_COV['MAX'][i] is not np.nan) and (tbl_COV['Value'][i] is 'x'): | |
| Q = (tbl_COV['CLOSE'][i] - tbl_COV['OPEN'][i]) / tbl_COV['CLOSE'][i] | |
| line_IH = tbl_IH[tbl_IH['Unnamed: 0'] == tbl_COV['Unnamed: 0.1'][i + 1]].reset_index() | |
| P = (line_IH['close'][0] - line_IH['open'][0]) / line_IH['close'][0] | |
| if tbl_COV[tbl_COV['MAX'][i]][i] > K: | |
| # PQ同号 | |
| if (P >= 0 and Q >= 0) or (P <= 0 and Q <= 0): | |
| value *= (1 + abs(P)) | |
| # PQ异号 | |
| else: | |
| value *= (1 - abs(P)) | |
| tmp[i] = value | |
| print('ok') | |
| elif tbl_COV[tbl_COV['MAX'][i]][i] < -K: | |
| # PQ同号 | |
| if (P >= 0 and Q >= 0) or (P <= 0 and Q <= 0): | |
| value *= (1 - abs(P)) | |
| # PQ异号 | |
| else: | |
| value *= (1 + abs(P)) | |
| tmp[i] = value | |
| print('ok') | |
| else: | |
| tmp[i] = tbl_COV['Value'][i] | |
| print('pass') | |
| # newdata += 1 | |
| else: | |
| tmp[i] = tbl_COV['Value'][i] | |
| if tbl_COV['Value'][i] is not 'x': | |
| value = (float)(tbl_COV['Value'][i]) | |
| print('pass') | |
| ''' | |
| # 每一千条新数据保存一次 | |
| if newdata == 1000: | |
| print('分片保存...', end = '', flush = True) | |
| tbl_COV['Value'] = tmp | |
| tbl_COV.to_csv('wonderful.csv', index = False) | |
| newdata = 0 | |
| print('ok') | |
| ''' | |
| tbl_COV['Value'] = tmp | |
| tbl_COV.to_csv('wonderful.csv', index = False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment