@Channelchan
2018-03-13T06:22:10.000000Z
字数 4331
阅读 26427
from jaqs.data.dataapi import DataApifrom jaqs.data import DataViewimport numpy as npfrom datetime import datetimeimport pandas as pdimport warningsimport alphalenswarnings.filterwarnings("ignore")dataview_folder = 'JAQS_Data/hs300'dv = DataView()dv.load_dataview(dataview_folder)
D:\Anaconda3\lib\site-packages\statsmodels\compat\pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
from pandas.core import datetools
Dataview loaded successfully.
def change_columns_index(signal):new_names = {}for c in signal.columns:if c.endswith('SZ'):new_names[c] = c.replace('SZ', 'XSHE')elif c.endswith('SH'):new_names[c] = c.replace('SH', 'XSHG')signal = signal.rename_axis(new_names, axis=1)signal.index = pd.Index(map(lambda x: datetime.strptime(str(x),"%Y%m%d") , signal.index))return signal
all_factor = ['pb', 'roe', 'roa', 'price_div_dps', 'ps_ttm', 'pe_ttm']
origin_factors = {f: change_columns_index(dv.get_ts(f)) for f in all_factor}
from jaqs.research.signaldigger import process#去极值,z_score标准化,加干扰值PN_disturbed = pd.Panel({name: process.get_disturbed_factor(process.standardize(process.winsorize(frame)))\for name, frame in origin_factors.items()})
print(PN_disturbed)
<class 'pandas.core.panel.Panel'>
Dimensions: 6 (items) x 1453 (major_axis) x 539 (minor_axis)
Items axis: pb to roe
Major_axis axis: 2012-01-04 00:00:00 to 2017-12-22 00:00:00
Minor_axis axis: 000001.XSHE to 603993.XSHG
prices = change_columns_index(dv.get_ts('close_adj'))
alphalens.performance.mean_information_coefficient()
def cal_monthly_ic(factor_df):factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor_df.stack(), prices, quantiles=5)return alphalens.performance.mean_information_coefficient(factor_data, by_time='M')monthly_ic = {key: cal_monthly_ic(value) for key, value in PN_disturbed.iteritems()}
print(list(monthly_ic.values())[0].tail())
1 5 10
date
2017-08-31 -0.001214 0.062826 0.089109
2017-09-30 0.032383 0.094269 0.137698
2017-10-31 0.041499 0.111624 0.134414
2017-11-30 -0.040708 -0.087116 -0.116298
2017-12-31 0.094886 0.264356 0.234456
monthly_ic_mean = pd.DataFrame(list(map(lambda frame: frame.mean(), monthly_ic.values())),monthly_ic.keys())monthly_ic_std = pd.DataFrame(list(map(lambda frame: frame.std(), monthly_ic.values())),monthly_ic.keys())
print(monthly_ic_mean)print (monthly_ic_std)
1 5 10
pb -0.013308 -0.010537 -0.014940
pe_ttm -0.008865 -0.010849 -0.016026
price_div_dps -0.004332 -0.003527 -0.006607
ps_ttm -0.004566 -0.002501 -0.004181
roa 0.008087 0.018150 0.023119
roe 0.006245 0.013959 0.020110
1 5 10
pb 0.055441 0.123580 0.159507
pe_ttm 0.037962 0.083811 0.112212
price_div_dps 0.029059 0.064610 0.086014
ps_ttm 0.044336 0.097744 0.127836
roa 0.034680 0.080584 0.105429
roe 0.030441 0.070708 0.097227
import matplotlib.pyplot as pltimport numpy as npfig, ax = plt.subplots(figsize=(15,7))N=6ind = np.arange(N) # the x locations for the groupswidth = 0.3 # the width of the bars# Example dataind_name = tuple(monthly_ic_mean.index)y_pos = np.arange(len(ind))one_mean = monthly_ic_mean.iloc[:,0]one_std = monthly_ic_std.iloc[:,0]five_mean = monthly_ic_mean.iloc[:,1]five_std = monthly_ic_std.iloc[:,1]ten_mean = monthly_ic_mean.iloc[:,2]ten_std = monthly_ic_std.iloc[:,2]ax.barh(ind - width, one_mean, align='edge',height=0.2, xerr=one_std, label='one_day', color='r')ax.barh(ind - 0.05, five_mean, align='edge',height=0.2,xerr=five_std, label='five_day', color='y')ax.barh(ind + width, ten_mean, align='center',height=0.2,xerr=ten_std, label='ten_day', color='b')ax.set_yticks(y_pos)ax.set_yticklabels(ind_name)ax.invert_yaxis() # labels read top-to-bottomax.set_xlabel('mean_IC')ax.set_title('HS300_IC in Different Period')plt.legend()plt.show()

PN_IC = pd.Panel(monthly_ic)
fig = plt.figure(figsize=(15,7))plt.plot(PN_IC.minor_xs(5))plt.hlines(0.02,PN_IC.minor_xs(5).index[0],PN_IC.minor_xs(5).index[-1] , linestyles='dashed', alpha=0.5)plt.legend(PN_IC.minor_xs(5).columns)plt.title('FiveDay_IC')plt.show()

def IR(df):return df.mean()/df.std()print ('OneDay_IR:\n', IR(PN_IC.minor_xs(1)))print ('FiveDay_IR:\n', IR(PN_IC.minor_xs(5)))print ('TenDay_IR:\n', IR(PN_IC.minor_xs(10)))
OneDay_IR:
pb -0.240034
pe_ttm -0.233529
price_div_dps -0.149082
ps_ttm -0.102978
roa 0.233188
roe 0.205164
dtype: float64
FiveDay_IR:
pb -0.085268
pe_ttm -0.129441
price_div_dps -0.054597
ps_ttm -0.025585
roa 0.225227
roe 0.197414
dtype: float64
TenDay_IR:
pb -0.093661
pe_ttm -0.142818
price_div_dps -0.076814
ps_ttm -0.032705
roa 0.219280
roe 0.206836
dtype: float64
