@Channelchan
2017-12-02T10:17:33.000000Z
字数 6587
阅读 14726
用数学公式找到Alpha机会
WorldQuant根据数据挖掘的方法发掘了101个alpha,据说里面 80% 的因子仍然还行之有效并运行在他们的投资策略中。Alpha101给出的公式,也就是计算机代码101年真实的定量交易Alpha。他们的平均持有期大约范围0.6 - 6.4天。平均两两这些Alpha的相关性较低,为15.9%。回报是与波动强相关,但对换手率没有明显的依赖性,直接确认较早的间接经验分析结果。我们从经验上进一步发现换手率对alpha相关性的解释能力很差。
PDF下载:
Python代码下载:
详细参考原文PDF
# 1. 编制函数需要的算法import numpy as npimport pandas as pdimport talib as tafrom scipy.stats import rankdatafrom fxdayu_data import DataAPIfrom datetime import datetimeimport alphalensfrom fxdayu_alphaman.factor.factor import Factorimport matplotlib.pyplot as plt# 计算alpha101时会使用的函数# 移动求和def ts_sum(df,window=10):return df.rolling(window).sum()#移动平均def ts_mean(df,window=10):return df.rolling(window).mean()#移动标准差def stddev(df,window=10):return df.rolling(window).std()#移动相关系数def correlation(x,y,window=10):return x.rolling(window).corr(y)#移动协方差def covariance(x,y,window=10):return x.rolling(window).cov(y)def rolling_rank(na):return rankdata(na)[-1]#移动排序def ts_rank(df, window=10):return df.rolling(window).apply(rolling_rank)def rolling_prod(na):return na.prod(na)#移动乘积def product(df,window=10):return df.rolling(window).apply(rolling_prod)# 移动窗口最小值def ts_min(df,window=10):return df.rolling(window).min()# 移动窗口最大值def ts_max(df,window=10):return df.rolling(window).max()# 差值def delta(df,period=1):return df.diff(period)# 位移def delay(df,period=1):return df.shift(period)# 横向排序def rank(df):return df.rank(axis=1, pct=True)# 数值规模def scale(df,k=1):return df.mul(k).div(np.abs(df).sum())# 最大值的坐标def ts_argmax(df,window=10):return df.rolling(window).apply(np.argmax)+1# 最小值的坐标def ts_argmin(df,window=10):return df.rolling(window).apply(np.argmin)+1
# 2. 定义计算alpha值的类class alphas(object):def __init__(self, pn_data):""":传入参数 pn_data: pandas.Panel"""# 获取历史数据if pn_data.isnull().values.any():pn_data.fillna(method='ffill',inplace=True)self.open = pd.DataFrame(pn_data.minor_xs('open'), dtype=np.float64)self.high = pd.DataFrame(pn_data.minor_xs('high'), dtype=np.float64)self.low = pd.DataFrame(pn_data.minor_xs('low'), dtype=np.float64)self.close = pd.DataFrame(pn_data.minor_xs('close'), dtype=np.float64)self.volume = pd.DataFrame(pn_data.minor_xs('volume'), dtype=np.float64)self.returns = pd.DataFrame(self.close.pct_change())self.adv = ts_mean(self.volume, 10)self.vwap = ts_sum(self.close*self.volume, 10)/ts_sum(self.volume, 10)# 3. 编制因子的函数# alpha001:(rank(Ts_ArgMax(SignedPower(((returns < 0) ? stddev(returns, 20) : close), 2.), 5)) -0.5)def alpha001(self):inner = self.closeinner[self.returns < 0] = stddev(self.returns, 20)alpha = rank(ts_argmax(inner ** 2, 5))return alpha# alpha002:(-1 * correlation(rank(delta(log(volume), 2)), rank(((close - open) / open)), 6))def alpha002(self):alpha = -1 * correlation(rank(delta(np.log(self.volume), 2)), rank((self.close - self.open) / self.open), 6)return alpha.replace([-np.inf, np.inf], np.nan)# alpha003:(-1 * correlation(rank(open), rank(volume), 10))def alpha003(self):alpha = -1 * correlation(rank(self.open), rank(self.volume), 10)return alpha.replace([-np.inf, np.inf], np.nan)# alpha004: (-1 * Ts_Rank(rank(low), 9))def alpha004(self):alpha = -1 * ts_rank(rank(self.low), 9)return alpha# alpha005:(rank((open - (sum(vwap, 10) / 10))) * (-1 * abs(rank((close - vwap)))))def alpha005(self):alpha = (rank((self.open - (ts_sum(self.vwap, 10) / 10))) * (-1 * np.abs(rank((self.close - self.vwap)))))return alpha# alpha006: (-1 * correlation(open, volume, 10))def alpha006(self):alpha = -1 * correlation(self.open, self.volume, 10)return alpha# alpha007: ((adv20 < volume) ? ((-1 * ts_rank(abs(delta(close, 7)), 60)) * sign(delta(close, 7))) : (-1* 1))def alpha007(self):adv20 = ts_mean(self.volume, 20)alpha = -1 * ts_rank(abs(delta(self.close, 7)), 60) * np.sign(delta(self.close, 7))alpha[adv20 >= self.volume] = -1return alpha# alpha008: (-1 * rank(((sum(open, 5) * sum(returns, 5)) - delay((sum(open, 5) * sum(returns, 5)),10))))def alpha008(self):alpha = -1 * (rank(((ts_sum(self.open, 5) * ts_sum(self.returns, 5)) -delay((ts_sum(self.open, 5) * ts_sum(self.returns, 5)), 10))))return alpha# alpha009:((0 < ts_min(delta(close, 1), 5)) ? delta(close, 1) : ((ts_max(delta(close, 1), 5) < 0) ?delta(close, 1) : (-1 * delta(close, 1))))def alpha009(self):delta_close = delta(self.close, 1)cond_1 = ts_min(delta_close, 5) > 0cond_2 = ts_max(delta_close, 5) < 0alpha = -1 * delta_closealpha[cond_1 | cond_2] = delta_closereturn alpha# alpha010: rank(((0 < ts_min(delta(close, 1), 4)) ? delta(close, 1) : ((ts_max(delta(close, 1), 4) < 0)? delta(close, 1) : (-1 * delta(close, 1)))))def alpha010(self):delta_close = delta(self.close, 1)cond_1 = ts_min(delta_close, 4) > 0cond_2 = ts_max(delta_close, 4) < 0alpha = -1 * delta_closealpha[cond_1 | cond_2] = delta_closereturn alpha
# 4. 传入股票池数据if __name__ == '__main__':start = datetime(2017,1,1)end = datetime(2017,11,11)codes = DataAPI.info.codes('hs300')pn = DataAPI.candle(codes,'D',start=start, end=end)prices = pn.minor_xs('close')
alpha = alphas(pn)factors = {'one': alpha.alpha001(),'two': alpha.alpha002(),'three': alpha.alpha003(),'four': alpha.alpha004(),'five': alpha.alpha005(),'six': alpha.alpha006(),'seven': alpha.alpha007(),'eight': alpha.alpha008(),'nine': alpha.alpha009(),'ten': alpha.alpha010()}f = Factor()factors_disturbed = {name: f.get_disturbed_factor(frame) for name, frame in factors.items()}def cal_monthly_ic(factor):factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor.stack(), prices, quantiles=5,periods=(1,5,10,20,60))return alphalens.performance.mean_information_coefficient(factor_data, by_time='M')monthly_ic = {key: cal_monthly_ic(value) for key, value in factors_disturbed.items()}
monthly_ic_mean = pd.DataFrame(list(map(lambda frame: frame.mean(), monthly_ic.values())),monthly_ic.keys())print (monthly_ic_mean)
1 5 10 20 60
one -0.011799 -0.007922 -0.005979 0.012213 0.025780
two 0.013996 0.004542 -0.008711 -0.016913 -0.023616
three 0.006315 0.026195 0.045510 0.029002 0.053809
four 0.025770 0.033320 0.017888 0.002327 0.007212
five 0.006262 -0.006906 -0.032523 -0.019436 -0.013545
six 0.010031 0.056760 0.075251 0.041658 0.080865
seven -0.006032 0.015775 0.002359 0.006403 0.011402
eight 0.009432 0.024283 0.018409 -0.014471 -0.017339
nine 0.013133 0.013432 0.012541 0.001124 0.005285
ten 0.008071 0.010746 0.014631 0.003472 0.006771
import seaborn as snsplt.figure(figsize=(10, 10))sns.heatmap(round(monthly_ic_mean,2),annot=True, square=True, )plt.show()

factor = factors_disturbed.get('six').stack()
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor, prices, quantiles=5)mean_return_by_q, std_err_by_q = alphalens.performance.mean_return_by_quantile(factor_data, by_date=True)
alphalens.plotting.plot_cumulative_returns_by_quantile(mean_return_by_q, 10)plt.show()

下载Alpha101完整代码研究,并设计有效的Alpha因子,导入Alphalens计算绩效。