fromquantopian.researchimportrun_pipelinefromquantopian.pipelineimportPipelinefromquantopian.pipelineimportfactors,filters,classifiersfromquantopian.pipeline.classifiersimportClassifierfromquantopian.pipeline.factorsimportCustomFactor,Returns,AverageDollarVolume,SimpleMovingAveragefromquantopian.pipeline.filtersimportStaticAssets,Q500US,Q1500US,Q3000US,QTradableStocksUSfromquantopian.pipeline.filters.fundamentalsimportIsPrimarySharefromquantopian.pipeline.classifiers.fundamentalsimportSectorfromquantopian.pipeline.data.builtinimportUSEquityPricingimportalphalensimportmathimportdatetimeimportnumpyasnpimportpandasaspd## Helper functionsdefhigh_volume_universe(top_liquid,min_price=None,min_volume=None):""" Computes a security universe of liquid stocks and filtering out hard to trade ones Returns ------- high_volume_tradable - zipline.pipeline.filter """iftop_liquid=='QTradableStocksUS':universe=QTradableStocksUS()eliftop_liquid==500:universe=Q500US()eliftop_liquid==1500:universe=Q1500US()eliftop_liquid==3000:universe=Q3000US()else:universe=filters.make_us_equity_universe(target_size=top_liquid,rankby=factors.AverageDollarVolume(window_length=200),mask=filters.default_us_equity_universe_mask(),groupby=Sector(),max_group_weight=0.3,smoothing_func=lambdaf:f.downsample('month_start'),)ifmin_priceisnotNone:price=SimpleMovingAverage(inputs=[USEquityPricing.close],window_length=21,mask=universe)universe&=(price>=min_price)ifmin_volumeisnotNone:volume=SimpleMovingAverage(inputs=[USEquityPricing.volume],window_length=21,mask=universe)universe&=(volume>=min_volume)returnuniversedefrun_pipeline_chunks(pipe,start_date,end_date,chunks_len=None):""" Drop-in replacement for run_pipeline. run_pipeline fails over a very long period of time (memery usage), so we need to split in chunks the pipeline and concatenate the results """chunks=[]current=pd.Timestamp(start_date)end=pd.Timestamp(end_date)step=pd.Timedelta(weeks=120)ifchunks_lenisNoneelsechunks_lenwhilecurrent<=end:current_end=current+stepifcurrent_end>end:current_end=endprint'Running pipeline:',current,' - ',current_endresults=run_pipeline(pipe,current.strftime("%Y-%m-%d"),current_end.strftime("%Y-%m-%d"))chunks.append(results)# pipeline returns more days than requested (if no trading day), so get last date from the resultscurrent_end=results.index.get_level_values(0)[-1].tz_localize(None)current=current_end+pd.Timedelta(days=1)returnpd.concat(chunks)defconstruct_factor_history(factor_cls,start_date='2015-10-1',end_date='2016-2-1',factor_name='factor',top_liquid=500,sector_column=None):""" Creates a DataFrame containing daily factor values and sector codes for a liquidity constrained universe. The returned DataFrame is can be used in the factor tear sheet. """ok_universe=high_volume_universe(top_liquid)pattern_code,pattern_lag=factor_cls(mask=ok_universe)sector=Sector(mask=ok_universe)pipe=Pipeline()pipe.add(pattern_code,factor_name)pipe.add(pattern_lag,'lag')ifsector_columnisnotNone:# this is very slow toopipe.add(sector,sector_column)pipe.set_screen(ok_universe)daily_factor=run_pipeline_chunks(pipe,start_date=start_date,end_date=end_date)#daily_factor = run_pipeline(pipe, start_date=start_date, end_date=end_date, chunksize=250)returndaily_factor.dropna()defget_daily_price(sid_universe,start_date,end_date,extra_days_before=0,extra_days_after=0):""" Creates a DataFrame containing daily percentage returns and price """extra_days=math.ceil(extra_days_before*365.0/252.0)+3# just to be surestart_date=datetime.datetime.strptime(start_date,"%Y-%m-%d")-datetime.timedelta(days=extra_days)start_date=start_date.strftime("%Y-%m-%d")extra_days=math.ceil(extra_days_after*365.0/252.0)+3# just to be sureend_date=datetime.datetime.strptime(end_date,"%Y-%m-%d")+datetime.timedelta(days=extra_days)end_date=end_date.strftime("%Y-%m-%d")pricing=get_pricing(sid_universe,start_date=start_date,end_date=end_date,fields='open_price')returnpricing

from__future__importdivisionfromstatsmodels.nonparametric.kernel_regressionimportKernelRegfromnumpyimportlinspacefromscipy.signalimportargrelextremafromcollectionsimportdefaultdictimportscipy.statsasstatsdef_beta(stock_prices,bench_prices):# `linregress` returns its results in the following order:# slope, intercept, r-value, p-value, stderrregr_results=stats.linregress(y=stock_prices,x=bench_prices)#alpha = regr_results[1]beta=regr_results[0]#r_value = regr_results[2]p_value=regr_results[3]#stderr = regr_results[4] # Check null hypothesisifp_value>0.05:beta=0.returnbetadeffind_max_min(prices):prices_=prices.copy()prices_.index=linspace(1.,len(prices_),len(prices_))kr=KernelReg([prices_.values],[prices_.index.values],var_type='c',bw=[1.8,1])f=kr.fit([prices_.index.values])smooth_prices=pd.Series(data=f[0],index=prices.index)local_max=argrelextrema(smooth_prices.values,np.greater)[0]local_min=argrelextrema(smooth_prices.values,np.less)[0]price_local_max_dt=[]foriinlocal_max:if(i>1)and(i<len(prices)-1):price_local_max_dt.append(prices.iloc[i-2:i+2].argmax())price_local_min_dt=[]foriinlocal_min:if(i>1)and(i<len(prices)-1):price_local_min_dt.append(prices.iloc[i-2:i+2].argmin())prices.name='price'maxima=pd.DataFrame(prices.loc[price_local_max_dt])minima=pd.DataFrame(prices.loc[price_local_min_dt])max_min=pd.concat([maxima,minima]).sort_index()max_min.index.name='date'max_min=max_min.reset_index()max_min=max_min[~max_min.date.duplicated()]p=prices.reset_index()max_min['day_num']=p[p['index'].isin(max_min.date)].index.valuesmax_min=max_min.set_index('day_num').pricereturnmax_mindeffind_patterns(max_min):patterns=defaultdict(list)foriinrange(5,len(max_min)+1):window=max_min.iloc[i-5:i]# pattern must play out in less than 36 daysifwindow.index[-1]-window.index[0]>35:continue# Using the notation from the paper to avoid mistakese1=window.iloc[0]e2=window.iloc[1]e3=window.iloc[2]e4=window.iloc[3]e5=window.iloc[4]rtop_g1=np.mean([e1,e3,e5])rtop_g2=np.mean([e2,e4])# Head and Shouldersif(e1>e2)and(e3>e1)and(e3>e5)and \
(abs(e1-e5)<=0.03*np.mean([e1,e5]))and \
(abs(e2-e4)<=0.03*np.mean([e1,e5])):patterns['HS'].append((window.index[0],window.index[-1]))# Inverse Head and Shoulderselif(e1<e2)and(e3<e1)and(e3<e5)and \
(abs(e1-e5)<=0.03*np.mean([e1,e5]))and \
(abs(e2-e4)<=0.03*np.mean([e1,e5])):patterns['IHS'].append((window.index[0],window.index[-1]))# Broadening Topelif(e1>e2)and(e1<e3)and(e3<e5)and(e2>e4):patterns['BTOP'].append((window.index[0],window.index[-1]))# Broadening Bottomelif(e1<e2)and(e1>e3)and(e3>e5)and(e2<e4):patterns['BBOT'].append((window.index[0],window.index[-1]))# Triangle Topelif(e1>e2)and(e1>e3)and(e3>e5)and(e2<e4):patterns['TTOP'].append((window.index[0],window.index[-1]))# Triangle Bottomelif(e1<e2)and(e1<e3)and(e3<e5)and(e2>e4):patterns['TBOT'].append((window.index[0],window.index[-1]))# Rectangle Topelif(e1>e2)and(abs(e1-rtop_g1)/rtop_g1<0.0075)and \
(abs(e3-rtop_g1)/rtop_g1<0.0075)and(abs(e5-rtop_g1)/rtop_g1<0.0075)and \
(abs(e2-rtop_g2)/rtop_g2<0.0075)and(abs(e4-rtop_g2)/rtop_g2<0.0075)and \
(min(e1,e3,e5)>max(e2,e4)):patterns['RTOP'].append((window.index[0],window.index[-1]))# Rectangle Bottomelif(e1<e2)and(abs(e1-rtop_g1)/rtop_g1<0.0075)and \
(abs(e3-rtop_g1)/rtop_g1<0.0075)and(abs(e5-rtop_g1)/rtop_g1<0.0075)and \
(abs(e2-rtop_g2)/rtop_g2<0.0075)and(abs(e4-rtop_g2)/rtop_g2<0.0075)and \
(max(e1,e3,e5)>min(e2,e4)):patterns['RBOT'].append((window.index[0],window.index[-1]))returnpatternsdef_pattern_identification(prices,indentification_lag):max_min=find_max_min(prices)# we are only interested in the last pattern (if multiple patterns are there)# and also the last min/max must have happened less than "indentification_lag"# days ago otherways it mush have already been identified or it is too late to be usefullmax_min_last_window=Nonelast_max_min=Noneforiinreversed(range(len(max_min))):days_ago=prices.index[-1]-max_min.index[i]ifdays_ago<=indentification_lag:last_max_min=days_agomax_min_last_window=max_min.iloc[i-4:i+1]breakifmax_min_last_windowisNone:returnpd.Series({'code':np.nan,'lag':np.nan})# possibly identify a pattern in the selected windowpatterns=find_patterns(max_min_last_window)iflen(patterns)!=1:returnpd.Series({'code':np.nan,'lag':np.nan})name,start_end_day_nums=patterns.iteritems().next()pattern_code={'HS':20,'IHS':2,'BTOP':10,'BBOT':1,'TTOP':40,'TBOT':4,'RTOP':30,'RBOT':3,}returnpd.Series({'code':pattern_code[name],'lag':last_max_min})classPatternFactor(CustomFactor):params=('indentification_lag',)inputs=[USEquityPricing.close]window_length=40outputs=['codes','lags']defcompute(self,today,assets,out,close,indentification_lag):prices=pd.DataFrame(close,columns=assets)tmp=prices.apply(_pattern_identification,args=(indentification_lag,))out.codes[:]=tmp.loc['code']out.lags[:]=tmp.loc['lag']classPatternFactorBE(CustomFactor):params=('indentification_lag',)inputs=[USEquityPricing.close]window_length=40market_sid=symbols('SPY').siddefcompute(self,today,assets,out,close,indentification_lag):delta_days=1market_sid=self.market_sidreturns=(close[delta_days:]/close[:-delta_days])-1market_idx=assets.get_loc(market_sid)market_returns=returns[:,market_idx]betas=np.apply_along_axis(_beta,0,returns,market_returns)returns-=(returns[:,[market_idx]]*betas)# remove returns due to betadaily_ret=pd.DataFrame(returns,columns=assets).fillna(0.)cum_returns=(daily_ret+1).cumprod()-1# daily percent returns -> comulative percent returnsout[:]=cum_returns.apply(_pattern_identification,args=(indentification_lag,))

factor_name='factor'start_date='2003-01-01'end_date='2018-01-01'top_liquid=500filter_universe=True# very slow, filter out untradable stocksshow_sector_plots=False# very slow to load the sector column in pipeline# alphalens specificperiods=(1,2,3,4,5,6,10)quantiles=Nonebins=[-100,100]avgretplot=(36,25)# use None to avoid plotting or (days_before, days_after)filter_zscore=Nonelong_short=Falseprices_cache=None# this saves lots of time when running tear sheet multiple times