@Channelchan
2017-11-25T07:25:53.000000Z
字数 3410
阅读 12825
from fxdayu_data import DataAPIfrom datetime import datetimeimport alphalensimport pandas as pdstart = datetime(2017,1,1)end = datetime(2017,11,11)period = 5codes = DataAPI.info.codes('hs300')factors = ('PB','LCAP','HBETA', 'ROE', 'InvestCashGrowRate', 'ROC20')PN = DataAPI.factor(codes, factors, start=start, end=end)prices = DataAPI.candle(codes, 'D', 'close', start=start, end=end).minor_xs('close')print(PN)
<class 'pandas.core.panel.Panel'>
Dimensions: 300 (items) x 207 (major_axis) x 6 (minor_axis)
Items axis: 000001.XSHE to 603993.XSHG
Major_axis axis: 2017-01-03 15:00:00 to 2017-11-10 15:00:00
Minor_axis axis: HBETA to ROE
横截面标准化/去极值/加干扰
from fxdayu_alphaman.factor.factor import Factorfrom fxdayu_alphaman.factor.admin import Adminorigin_factors = {name: item for name, item in PN.transpose(2,1,0).iteritems()}# 实例化f = Factor()#横截面标准化/去极值PN_handle = pd.Panel({name: f.standardize(f.winsorize(frame)) for name, frame in origin_factors.items()})# 加干扰PN_disturbed = pd.Panel({name: f.get_disturbed_factor(frame) for name, frame in PN_handle.iteritems()})
1_获取IC的DataFrame
2_获取IC权重的DataFrame
3_计算最优权重组合的新因子值的MultiIndex
f_admin = Admin()# 获取IC的DataFrameic_df = f_admin.get_factors_ic_df({name: item.stack() for name, item in PN_disturbed.iteritems()},pool=codes, start=start, end=end, price=prices, quantiles=5, periods=(5,))# 获取IC权重的DataFrameic_weight_df = f_admin.get_ic_weight_df(ic_df[period], period, rollback_period=10)# 计算最优权重组合的新因子值的MultiIndexnew_factor = f_admin.ic_cov_weighted_factor({name: item.stack() for name, item in PN_disturbed.iteritems()}, ic_weight_df=ic_weight_df)
factor = new_factor.multifactor_valueprint(factor.tail())
factor
date asset
2017-11-10 15:00:00 601997.XSHG -0.338474
601998.XSHG 0.630329
603160.XSHG 2.109389
603858.XSHG 0.168304
603993.XSHG 1.060125
1_获取factor_data
2_计算mean_return_by_q, std_err_by_q
3_展示Quantile与IC图表
4_计算多空累积收益
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor, prices, quantiles=5)mean_return_by_q, std_err_by_q = alphalens.performance.mean_return_by_quantile(factor_data, by_date=True)
import matplotlib.pyplot as pltalphalens.plotting.plot_cumulative_returns_by_quantile(mean_return_by_q, period)plt.show()
](http://static.zybuluo.com/Channelchan/zvl84dy512k6kv7urbhsf5uj/output_10_1.png)
ic = alphalens.performance.factor_information_coefficient(factor_data)alphalens.plotting.plot_ic_hist(ic)mean_monthly_ic = alphalens.performance.mean_information_coefficient(factor_data, by_time='M')alphalens.plotting.plot_monthly_ic_heatmap(mean_monthly_ic)plt.show()
](http://static.zybuluo.com/Channelchan/kn45z5kqlnjcixhnzvri9bwf/output_11_0.png)
](http://static.zybuluo.com/Channelchan/26mgq95j5h4f6hpqdiynr900/output_11_1.png)
# 多空累积收益factor_returns = alphalens.performance.factor_returns(factor_data)alphalens.plotting.plot_cumulative_returns(factor_returns[10])plt.show()
](http://static.zybuluo.com/Channelchan/5033xmltwyaysxlebupvjxku/output_12_0.png)
big_factor = ['LCAP','ROE']
import numpy as npdef cal_big(ic_df):# 设置IC阈值为0.03big_ic = ic_df[big_factor]big_ic[big_ic<0.03]=np.nanbig_ic[big_ic>=0.03]=1return big_ic
def big_select(rows, n=30):# 只选最大的30只time, row = rowsrow = row.dropna().indexdct = {}if len(row) == 0:return dcttarget = DataAPI.factor(codes, row, length=1).iloc[:, -1, :].Tfor name, s in target.iteritems():for code in s.nlargest(n).index:dct[code] = 1return dct
select_big = pd.DataFrame(list(map(big_select, cal_big(ic_df[5]).iterrows())), index=PN.major_axis[:])
big_result = select_big.stack()
from fxdayu_alphaman.selector.admin import Admin as s_admins_admin = s_admin()big_performance = s_admin.calculate_performance("Roll_IC",big_result[big_result>0], #结果大于0的(选出的)start,end,periods=(5,))
import alphalensdef plot_performance(performance):alphalens.plotting.plot_cumulative_returns_by_quantile(performance["mean_return"],period=5)plt.show()plot_performance(big_performance)

