@Channelchan
2018-01-30T10:56:13.000000Z
字数 7851
阅读 24211
from jaqs.data.dataapi import DataApifrom jaqs.data import DataViewimport numpy as npfrom datetime import datetimeimport pandas as pdimport warningsimport alphalenswarnings.filterwarnings("ignore")dataview_folder = 'JAQS_Data/hs300'dv = DataView()dv.load_dataview(dataview_folder)
D:\Anaconda3\lib\site-packages\statsmodels\compat\pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
from pandas.core import datetools
Dataview loaded successfully.
def mask_index_member():df_index_member = dv.get_ts('index_member')mask_index_member = df_index_member ==0 #定义信号过滤条件-非指数成分return mask_index_memberdef limit_up_down():# 定义可买卖条件——未停牌、未涨跌停trade_status = dv.get_ts('trade_status')mask_sus = trade_status == u'停牌'# 涨停dv.add_formula('up_limit', '(close - Delay(close, 1)) / Delay(close, 1) > 0.095', is_quarterly=False)# 跌停dv.add_formula('down_limit', '(close - Delay(close, 1)) / Delay(close, 1) < -0.095', is_quarterly=False)can_enter = np.logical_and(dv.get_ts('up_limit') < 1, ~mask_sus) # 未涨停未停牌can_exit = np.logical_and(dv.get_ts('down_limit') < 1, ~mask_sus) # 未跌停未停牌return can_enter,can_exit
mask = mask_index_member()can_enter,can_exit = limit_up_down()price = dv.get_ts('close_adj')price_bench = dv.data_benchmark
Add formula failed: name [up_limit] exist. Try another name.
Add formula failed: name [down_limit] exist. Try another name.
all_factors = ['pb', 'roe', 'price_div_dps', 'ps_ttm', 'pe_ttm', 'roa']
origin_factors = {f: dv.get_ts(f) for f in all_factors}
1. 去极值,标准化,加干扰项
from jaqs.research.signaldigger import process#去极值,z_score标准化,加干扰值factor_dict = {name: process.get_disturbed_factor(process.standardize(process.winsorize(frame)))\for name, frame in origin_factors.items()}
2. 修改名称索引
def change_columns_index(signal):new_names = {}for c in signal.columns:if c.endswith('SZ'):new_names[c] = c.replace('SZ', 'XSHE')elif c.endswith('SH'):new_names[c] = c.replace('SH', 'XSHG')signal = signal.rename_axis(new_names, axis=1)signal.index = pd.Index(map(lambda x: datetime.strptime(str(x),"%Y%m%d") , signal.index))return signal
price_time = change_columns_index(price)
3. 计算IC的相关系数,并剔除高相关性的因子
def cal_daily_ic(factor_df):factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor_df.stack(), price_time, quantiles=5)return alphalens.performance.mean_information_coefficient(factor_data, by_time='D')daily_ic = {key: cal_daily_ic(change_columns_index(value)) for key, value in factor_dict.items()}daily_panel = pd.Panel(daily_ic)
factor_corr = daily_panel.minor_xs(5).corr()
print(factor_corr)
pb pe_ttm price_div_dps ps_ttm roa roe
pb 1.000000 0.858428 0.764783 0.913597 0.689696 0.229572
pe_ttm 0.858428 1.000000 0.823935 0.807155 0.376659 -0.169861
price_div_dps 0.764783 0.823935 1.000000 0.699770 0.320101 -0.109434
ps_ttm 0.913597 0.807155 0.699770 1.000000 0.615621 0.244025
roa 0.689696 0.376659 0.320101 0.615621 1.000000 0.789137
roe 0.229572 -0.169861 -0.109434 0.244025 0.789137 1.000000
ic = daily_panel.minor_xs(5).mean()
def compare(corr, targets):index = list(reversed(targets.index))length = len(index)for i in range(length):name = index[i]if available(corr, name, index[i+1:]):yield name
def available(corr, target, compares):for c in compares:if corr.loc[target, c] > 0.9 or (corr.loc[target, c] < -0.9 ):return Falsereturn True
for i in compare(factor_corr, ic.nlargest(2)):print(i)
roe
roa
big_dict = {i: factor_dict[i] for i in compare(factor_corr, ic.nlargest(2))}
1. combine_factors的equal_weight(等权重加权)
from jaqs.research import multi_factorEqual_Portfolio = multi_factor.combine_factors(big_dict,standardize_type="z_score",winsorization=False,weighted_method="equal_weight",max_IR_props=None)
print(Equal_Portfolio.tail(1))
symbol 000001.SZ 000002.SZ 000008.SZ 000009.SZ 000012.SZ 000021.SZ \
trade_date
20171222 -0.346562 -0.171129 -0.823929 -0.656348 0.081882 -0.025089
symbol 000024.SZ 000027.SZ 000031.SZ 000039.SZ ... 601998.SH \
trade_date ...
20171222 -0.411117 -0.70641 -0.22589 -0.646009 ... -0.36197
symbol 603000.SH 603160.SH 603288.SH 603699.SH 603799.SH 603833.SH \
trade_date
20171222 -1.052117 3.039816 2.988756 -0.184505 1.917662 2.297176
symbol 603858.SH 603885.SH 603993.SH
trade_date
20171222 0.39488 0.967938 -0.389438
[1 rows x 539 columns]
2. combine_factors的max_IRprops(动态加权最大化IR)
price_bench = dv.data_benchmarkmax_IR_props = {'price': price,'benchmark_price': price_bench,# 为空计算的是绝对收益 不为空计算相对收益'period': 5,'mask': mask,'can_enter': can_enter,'can_exit': can_exit,'forward': True,'commission': 0.0008,"covariance_type": "simple", #协方差矩阵估算方法 还可以为"shrink""rollback_period": 30} #用多少期的ic做权重计算
Factor_Portfolio = multi_factor.combine_factors(big_dict,standardize_type="rank",winsorization=False,weighted_method="max_IR",max_IR_props=max_IR_props)
Nan Data Count (should be zero) : 0; Percentage of effective data: 53%
Nan Data Count (should be zero) : 0; Percentage of effective data: 53%
print(Factor_Portfolio.tail(1))
symbol 000001.SZ 000002.SZ 000008.SZ 000009.SZ 000012.SZ 000021.SZ \
trade_date
20171222 0.664193 0.671614 0.120594 0.263451 0.6141 0.593692
symbol 000024.SZ 000027.SZ 000031.SZ 000039.SZ ... 601998.SH \
trade_date ...
20171222 0.497217 0.243043 0.617811 0.304267 ... 0.649351
symbol 603000.SH 603160.SH 603288.SH 603699.SH 603799.SH 603833.SH \
trade_date
20171222 0.059369 0.96846 0.96475 0.441558 0.959184 0.942486
symbol 603858.SH 603885.SH 603993.SH
trade_date
20171222 0.61039 0.892393 0.419295
[1 rows x 539 columns]
1. JAQS绩效
import matplotlib.pyplot as pltfrom jaqs.research import SignalDiggerdef cal_obj(signal, name, period, quantile):price = dv.get_ts('close_adj')obj = SignalDigger(output_folder="hs300/%s" % name,output_format='pdf')obj.process_signal_before_analysis(signal,price=price,n_quantiles=quantile, period=period,# benchmark_price=price_bench,can_enter = can_enter,can_exit = can_exit,mask=mask)obj.create_full_report()return objdef plot_pfm(signal, name, period=5, quantile=5):obj = cal_obj(signal, name, period, quantile)obj.fig_objsplt.show()def signal_data(signal, name, period=5, quantile=5):obj = cal_obj(signal, name, period, quantile)return obj.signal_data
plot_pfm(Factor_Portfolio, 'roa_roe', 5, 5)
Nan Data Count (should be zero) : 0; Percentage of effective data: 51%
Value of signals of Different Quantiles Statistics
min max mean std count count %
quantile
1 0.001855 0.818182 0.124605 0.074315 81165 20.142948
2 0.128015 0.935065 0.332423 0.073199 80584 19.998759
3 0.317254 0.942486 0.522951 0.069826 80584 19.998759
4 0.484230 0.953618 0.706823 0.065806 80584 19.998759
5 0.664193 1.000000 0.881344 0.057896 80028 19.860775
Figure saved: C:\Users\small\OneDrive\notebook\Internet_Course\4_Selection\JAQS_Plus\hs300\roa_roe\returns_report.pdf
Information Analysis
ic
IC Mean 0.010
IC Std. 0.182
t-stat(IC) 2.008
p-value(IC) 0.045
IC Skew -0.048
IC Kurtosis -0.267
Ann. IR 0.053
Figure saved: C:\Users\small\OneDrive\notebook\Internet_Course\4_Selection\JAQS_Plus\hs300\roa_roe\information_report.pdf
<matplotlib.figure.Figure at 0x24c7449e828>


2. Alphalens绩效
factor = change_columns_index(Factor_Portfolio.loc['2015-09-01':]).stack()
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor, price_time, quantiles=5)mean_return_by_q, std_err_by_q = alphalens.performance.mean_return_by_quantile(factor_data, by_date=True)
import matplotlib.pyplot as pltalphalens.plotting.plot_cumulative_returns_by_quantile(mean_return_by_q, 5)plt.show()

ic = alphalens.performance.factor_information_coefficient(factor_data)alphalens.plotting.plot_ic_hist(ic)mean_monthly_ic = alphalens.performance.mean_information_coefficient(factor_data, by_time='M')alphalens.plotting.plot_monthly_ic_heatmap(mean_monthly_ic)plt.show()


roe_df = dv.get_ts('roe')pb_df = dv.get_ts('pb')
def largest(row, n=30):return pd.Series(1, row.nlargest(n).index)def smallest(row, n=30):return pd.Series(1, row.nlargest(n).index)
from functools import partialbig_roe = roe_df.agg(partial(largest, n=10), axis=1)small_pb = pb_df.agg(partial(smallest, n=10), axis=1)
PB最低的N只与ROE最高的N只的交集
Intersection_data = big_roe+small_pb
Intersection = Intersection_data[Intersection_data==2].replace(2,1)
PB最低的N只与ROE最高的N只的并集
Union_data = big_roe.replace(np.nan,0)+small_pb.replace(np.nan,0)
Union = Union_data.replace(0, np.nan)Union[Union>0] = 1