IC序列阈值选股
- 读取本地数据
- 获取每日IC的DataFrame
- 设置IC阈值
- 选出因子值最大的n只股票
- 位移与转换格式
- 回测绩效
1_读取本地数据
from jaqs.data.dataapi import DataApifrom jaqs.data import DataViewimport numpy as npfrom datetime import datetimeimport pandas as pdimport warningsimport alphalenswarnings.filterwarnings("ignore")dataview_folder = 'JAQS_Data/hs300'dv = DataView()dv.load_dataview(dataview_folder)
D:\Anaconda3\lib\site-packages\statsmodels\compat\pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
from pandas.core import datetools
Dataview loaded successfully.
def change_index(signal): signal.index = pd.Index(map(lambda x: datetime.strptime(str(x),"%Y%m%d") , signal.index)) return signal
big_factor = ['roe','roa']
PN = pd.Panel({name: change_index(dv.get_ts(name)) for name in big_factor})
prices = change_index(dv.get_ts('close_adj'))
2_获取每日IC的DataFrame
def cal_daily_ic(factor_df): factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor_df.stack(), prices, quantiles=5, periods=(5,)) return alphalens.performance.mean_information_coefficient(factor_data, by_time='D')daily_ic = {key: cal_daily_ic(value) for key, value in PN.iteritems()}
ic_df = pd.DataFrame({factor: daily_ic.get(factor)[5] for factor in big_factor})
print(ic_df.head())
roa roe
date
2012-01-04 -0.022573 0.095570
2012-01-05 -0.012033 -0.010591
2012-01-06 -0.028648 0.082105
2012-01-09 -0.138597 0.084739
2012-01-10 -0.177690 0.068066
df_ic = ic_df.rename_axis(lambda s: s.year*10000+s.month*100+s.day)
3_设置IC阈值
import numpy as npdef cal_big(big_ic, limit_ic=0.02): # 设置IC阈值为-limit_ic big_df = big_ic.copy() big_df[big_df<limit_ic]=np.nan big_df[big_df>=limit_ic]=1 return big_df
4_选出因子值最大的n只股票
def big_select(rows, n=10): # 只选最小的n只 time, row = rows row = row.dropna().index dct = {} if len(row) == 0: return dct target = pd.DataFrame({factor: dv.get_ts(factor, end_date=time).iloc[-1].dropna().T for factor in row}) for name, s in target.iteritems(): for code in s.nlargest(n).index: dct[code] = 1 return dct
5_位移与转换格式
select_big = pd.DataFrame(list(map(big_select, cal_big(df_ic).iterrows())), index=PN.major_axis[5:])
from datetime import timedeltadef change_columns_time(signal): new_names = {} for c in signal.columns: if c.endswith('SZ'): new_names[c] = c.replace('SZ', 'XSHE') elif c.endswith('SH'): new_names[c] = c.replace('SH', 'XSHG') signal = signal.rename_axis(new_names, axis=1) signal.index = pd.Index(map(lambda x: x+timedelta(hours=15) , signal.index)) return signal
big_df = change_columns_time(select_big)
6_回测绩效
import numpy as npimport talib as taimport pandas as pdimport rqalphafrom rqalpha.api import *#读取文件位置def init(context): context.codes = big_df context.stocks = []# scheduler.run_weekly(find_pool, tradingday=1) scheduler.run_daily(find_pool)def find_pool(context, bar_dict): try: codes = context.codes.loc[context.now] except KeyError: return stocks = codes.index[codes == 1] context.stocks = stocksdef handle_bar(context, bar_dict): buy(context, bar_dict)def buy(context, bar_dict): pool = context.stocks if pool is not None: stocks_len = len(pool) for stocks in context.portfolio.positions: if stocks not in pool: order_target_percent(stocks, 0) result = [] for codes in pool: if codes not in result and codes not in context.portfolio.positions: result.append(codes) if len(result): for r in result: order_target_percent(r, 1.0/stocks_len)config = { "base": { "start_date": "2015-09-01", "end_date": "2017-12-22", "accounts": {'stock':1000000}, "benchmark": "000300.XSHG" }, "extra": { "log_level": "error", }, "mod": { "sys_analyser": { "enabled": True, "plot": True } }}rqalpha.run_func(init=init, handle_bar=handle_bar, config=config)
