[docs]classLagOrderResults:""" Results class for choosing a model's lag order. Parameters ---------- ics : dict The keys are the strings ``"aic"``, ``"bic"``, ``"hqic"``, and ``"fpe"``. A corresponding value is a list of information criteria for various numbers of lags. selected_orders : dict The keys are the strings ``"aic"``, ``"bic"``, ``"hqic"``, and ``"fpe"``. The corresponding value is an integer specifying the number of lags chosen according to a given criterion (key). vecm : bool, default: `False` `True` indicates that the model is a VECM. In case of a VAR model this argument must be `False`. Notes ----- In case of a VECM the shown lags are lagged differences. """def__init__(self,ics,selected_orders,vecm=False):self.title=("VECM"ifvecmelse"VAR")+" Order Selection"self.title+=" (* highlights the minimums)"self.ics=icsself.selected_orders=selected_ordersself.vecm=vecmself.aic=selected_orders["aic"]self.bic=selected_orders["bic"]self.hqic=selected_orders["hqic"]self.fpe=selected_orders["fpe"]

def_estimate_var(self,lags,offset=0,trend='c'):""" lags : int Lags of the endogenous variable. offset : int Periods to drop from beginning-- for order selection so it's an apples-to-apples comparison trend : {str, None} As per above """# have to do this again because select_order does not call fitself.k_trend=k_trend=util.get_trendorder(trend)ifoffset<0:# pragma: no coverraiseValueError('offset must be >= 0')nobs=self.n_totobs-lags-offsetendog=self.endog[offset:]exog=Noneifself.exogisNoneelseself.exog[offset:]z=util.get_var_endog(endog,lags,trend=trend,has_constant='raise')ifexogisnotNone:# TODO: currently only deterministic terms supported (exoglags==0)# and since exoglags==0, x will be an array of size 0.x=util.get_var_endog(exog[-nobs:],0,trend="n",has_constant="raise")x_inst=exog[-nobs:]x=np.column_stack((x,x_inst))delx_inst# free memorytemp_z=zz=np.empty((x.shape[0],x.shape[1]+z.shape[1]))z[:,:self.k_trend]=temp_z[:,:self.k_trend]z[:,self.k_trend:self.k_trend+x.shape[1]]=xz[:,self.k_trend+x.shape[1]:]=temp_z[:,self.k_trend:]deltemp_z,x# free memory# the following modification of z is necessary to get the same results# as JMulTi for the constant-term-parameter...foriinrange(self.k_trend):if(np.diff(z[:,i])==1).all():# modify the trend-columnz[:,i]+=lags# make the same adjustment for the quadratic termif(np.diff(np.sqrt(z[:,i]))==1).all():z[:,i]=(np.sqrt(z[:,i])+lags)**2y_sample=endog[lags:]# Lütkepohl p75, about 5x faster than stated formulaparams=np.linalg.lstsq(z,y_sample,rcond=1e-15)[0]resid=y_sample-np.dot(z,params)# Unbiased estimate of covariance matrix $\Sigma_u$ of the white noise# process $u$# equivalent definition# .. math:: \frac{1}{T - Kp - 1} Y^\prime (I_T - Z (Z^\prime Z)^{-1}# Z^\prime) Y# Ref: Lütkepohl p.75# df_resid right now is T - Kp - 1, which is a suggested correctionavobs=len(y_sample)ifexogisnotNone:k_trend+=exog.shape[1]df_resid=avobs-(self.neqs*lags+k_trend)sse=np.dot(resid.T,resid)omega=sse/df_residvarfit=VARResults(endog,z,params,omega,lags,names=self.endog_names,trend=trend,dates=self.data.dates,model=self,exog=self.exog)returnVARResultsWrapper(varfit)

[docs]defselect_order(self,maxlags=None,trend="c"):""" Compute lag order selections based on each of the available information criteria Parameters ---------- maxlags : int if None, defaults to 12 * (nobs/100.)**(1./4) trend : str {"nc", "c", "ct", "ctt"} * "nc" - no deterministic terms * "c" - constant term * "ct" - constant and linear term * "ctt" - constant, linear, and quadratic term Returns ------- selections : LagOrderResults """ifmaxlagsisNone:maxlags=int(round(12*(len(self.endog)/100.)**(1/4.)))# TODO: This expression shows up in a bunch of places, but# in some it is `int` and in others `np.ceil`. Also in some# it multiplies by 4 instead of 12. Let's put these all in# one place and document when to use which variant.ics=defaultdict(list)p_min=0ifself.exogisnotNoneortrend!="nc"else1forpinrange(p_min,maxlags+1):# exclude some periods to same amount of data used for each lag# orderresult=self._estimate_var(p,offset=maxlags-p,trend=trend)fork,viniteritems(result.info_criteria):ics[k].append(v)selected_orders=dict((k,np.array(v).argmin()+p_min)fork,viniteritems(ics))returnLagOrderResults(ics,selected_orders,vecm=False)

[docs]defis_stable(self,verbose=False):"""Determine stability based on model coefficients Parameters ---------- verbose : bool Print eigenvalues of the VAR(1) companion Notes ----- Checks if det(I - Az) = 0 for any mod(z) <= 1, so all the eigenvalues of the companion matrix must lie outside the unit circle """returnis_stable(self.coefs,verbose=verbose)

[docs]defsimulate_var(self,steps=None,offset=None,seed=None):""" simulate the VAR(p) process for the desired number of steps Parameters ---------- steps : None or int number of observations to simulate, this includes the initial observations to start the autoregressive process. If offset is not None, then exog of the model are used if they were provided in the model offset : None or ndarray (steps, neqs) If not None, then offset is added as an observation specific intercept to the autoregression. If it is None and either trend (including intercept) or exog were used in the VAR model, then the linear predictor of those components will be used as offset. This should have the same number of rows as steps, and the same number of columns as endogenous variables (neqs). seed : {None, int} If seed is not None, then it will be used with for the random variables generated by numpy.random. Returns ------- endog_simulated : nd_array Endog of the simulated VAR process """steps_=NoneifoffsetisNone:ifself.k_exog_user>0orself.k_trend>1:# if more than intercept# endog_lagged contains all regressors, trend, exog_user# and lagged endog, trimmed initial observationsoffset=self.endog_lagged[:,:self.k_exog].dot(self.coefs_exog.T)steps_=self.endog_lagged.shape[0]else:offset=self.interceptelse:steps_=offset.shape[0]# default, but over written if exog or offset are usedifstepsisNone:ifsteps_isNone:steps=1000else:steps=steps_else:ifsteps_isnotNoneandsteps!=steps_:raiseValueError('if exog or offset are used, then steps must''be equal to their length or None')y=util.varsim(self.coefs,offset,self.sigma_u,steps=steps,seed=seed)returny

[docs]defplotsim(self,steps=None,offset=None,seed=None):""" Plot a simulation from the VAR(p) process for the desired number of steps """y=self.simulate_var(steps=steps,offset=offset,seed=seed)returnplotting.plot_mts(y)

[docs]defacorr(self,nlags=None):""" Autocorrelation function Parameters ---------- nlags : int or None The number of lags to include in the autocovariance function. The default is the number of lags included in the model. Returns ------- acorr : ndarray Autocorrelation and cross correlations (nlags, neqs, neqs) """returnutil.acf_to_acorr(self.acf(nlags=nlags))

@propertydefdf_model(self):""" Number of estimated parameters, including the intercept / trends """returnself.neqs*self.k_ar+self.k_exog@propertydefdf_resid(self):"""Number of observations minus number of estimated parameters"""returnself.nobs-self.df_model@cache_readonlydeffittedvalues(self):""" The predicted insample values of the response variables of the model. """returnnp.dot(self.endog_lagged,self.params)@cache_readonlydefresid(self):""" Residuals of response variable resulting from estimated coefficients """returnself.endog[self.k_ar:]-self.fittedvalues

[docs]defplot_sample_acorr(self,nlags=10,linewidth=8):""" Plot sample autocorrelation function Parameters ---------- nlags : int The number of lags to use in compute the autocorrelation. Does not count the zero lag, which will be returned. linewidth : int The linewidth for the plots. Returns ------- Figure The figure that contains the plot axes. """fig=plotting.plot_full_acorr(self.sample_acorr(nlags=nlags),linewidth=linewidth)returnfig

[docs]defreorder(self,order):"""Reorder variables for structural specification """iflen(order)!=len(self.params[0,:]):raiseValueError("Reorder specification length should match ""number of endogenous variables")# This converts order to list of integers if given as stringsifisinstance(order[0],str):order_new=[]fori,naminenumerate(order):order_new.append(self.names.index(order[i]))order=order_newreturn_reordered(self,order)

[docs]deftest_causality(self,caused,causing=None,kind='f',signif=0.05):""" Test Granger causality Parameters ---------- caused : int or str or sequence of int or str If int or str, test whether the variable specified via this index (int) or name (str) is Granger-caused by the variable(s) specified by `causing`. If a sequence of int or str, test whether the corresponding variables are Granger-caused by the variable(s) specified by `causing`. causing : int or str or sequence of int or str or None, default: None If int or str, test whether the variable specified via this index (int) or name (str) is Granger-causing the variable(s) specified by `caused`. If a sequence of int or str, test whether the corresponding variables are Granger-causing the variable(s) specified by `caused`. If None, `causing` is assumed to be the complement of `caused`. kind : {'f', 'wald'} Perform F-test or Wald (chi-sq) test signif : float, default 5% Significance level for computing critical values for test, defaulting to standard 0.05 level Notes ----- Null hypothesis is that there is no Granger-causality for the indicated variables. The degrees of freedom in the F-test are based on the number of variables in the VAR system, that is, degrees of freedom are equal to the number of equations in the VAR times degree of freedom of a single equation. Test for Granger-causality as described in chapter 7.6.3 of [1]_. Test H0: "`causing` does not Granger-cause the remaining variables of the system" against H1: "`causing` is Granger-causal for the remaining variables". Returns ------- results : CausalityTestResults References ---------- .. [1] Lütkepohl, H. 2005. *New Introduction to Multiple Time Series Analysis*. Springer. """ifnot(0<signif<1):raiseValueError("signif has to be between 0 and 1")allowed_types=(str,int)ifisinstance(caused,allowed_types):caused=[caused]ifnotall(isinstance(c,allowed_types)forcincaused):raiseTypeError("caused has to be of type string or int (or a ""sequence of these types).")caused=[self.names[c]iftype(c)==intelsecforcincaused]caused_ind=[util.get_index(self.names,c)forcincaused]ifcausingisnotNone:ifisinstance(causing,allowed_types):causing=[causing]ifnotall(isinstance(c,allowed_types)forcincausing):raiseTypeError("causing has to be of type string or int (or ""a sequence of these types) or None.")causing=[self.names[c]iftype(c)==intelsecforcincausing]causing_ind=[util.get_index(self.names,c)forcincausing]ifcausingisNone:causing_ind=[iforiinrange(self.neqs)ifinotincaused_ind]causing=[self.names[c]forcincaused_ind]k,p=self.neqs,self.k_arifp==0:err="Cannot test Granger Causality in a model with 0 lags."raiseRuntimeError(err)# number of restrictionsnum_restr=len(causing)*len(caused)*pnum_det_terms=self.k_exog# Make restriction matrixC=np.zeros((num_restr,k*num_det_terms+k**2*p),dtype=float)cols_det=k*num_det_termsrow=0forjinrange(p):foring_indincausing_ind:fored_indincaused_ind:C[row,cols_det+ed_ind+k*ing_ind+k**2*j]=1row+=1# Lütkepohl 3.6.5Cb=np.dot(C,vec(self.params.T))middle=np.linalg.inv(C@self.cov_params()@C.T)# wald statisticlam_wald=statistic=Cb@middle@Cbifkind.lower()=='wald':df=num_restrdist=stats.chi2(df)elifkind.lower()=='f':statistic=lam_wald/num_restrdf=(num_restr,k*self.df_resid)dist=stats.f(*df)else:raiseValueError('kind %s not recognized'%kind)pvalue=dist.sf(statistic)crit_value=dist.ppf(1-signif)returnCausalityTestResults(causing,caused,statistic,crit_value,pvalue,df,signif,test="granger",method=kind)

[docs]deftest_inst_causality(self,causing,signif=0.05):""" Test for instantaneous causality Parameters ---------- causing : If int or str, test whether the corresponding variable is causing the variable(s) specified in caused. If sequence of int or str, test whether the corresponding variables are causing the variable(s) specified in caused. signif : float between 0 and 1, default 5 % Significance level for computing critical values for test, defaulting to standard 0.05 level verbose : bool If True, print a table with the results. Returns ------- results : dict A dict holding the test's results. The dict's keys are: "statistic" : float The calculated test statistic. "crit_value" : float The critical value of the Chi^2-distribution. "pvalue" : float The p-value corresponding to the test statistic. "df" : float The degrees of freedom of the Chi^2-distribution. "conclusion" : str {"reject", "fail to reject"} Whether H0 can be rejected or not. "signif" : float Significance level Notes ----- Test for instantaneous causality as described in chapters 3.6.3 and 7.6.4 of [1]_. Test H0: "No instantaneous causality between caused and causing" against H1: "Instantaneous causality between caused and causing exists". Instantaneous causality is a symmetric relation (i.e. if causing is "instantaneously causing" caused, then also caused is "instantaneously causing" causing), thus the naming of the parameters (which is chosen to be in accordance with test_granger_causality()) may be misleading. This method is not returning the same result as JMulTi. This is because the test is based on a VAR(k_ar) model in statsmodels (in accordance to pp. 104, 320-321 in [1]_) whereas JMulTi seems to be using a VAR(k_ar+1) model. References ---------- .. [1] Lütkepohl, H. 2005. *New Introduction to Multiple Time Series Analysis*. Springer. """ifnot(0<signif<1):raiseValueError("signif has to be between 0 and 1")allowed_types=(str,int)ifisinstance(causing,allowed_types):causing=[causing]ifnotall(isinstance(c,allowed_types)forcincausing):raiseTypeError("causing has to be of type string or int (or a "+"a sequence of these types).")causing=[self.names[c]iftype(c)==intelsecforcincausing]causing_ind=[util.get_index(self.names,c)forcincausing]caused_ind=[iforiinrange(self.neqs)ifinotincausing_ind]caused=[self.names[c]forcincaused_ind]# Note: JMulTi seems to be using k_ar+1 instead of k_ark,t,p=self.neqs,self.nobs,self.k_arnum_restr=len(causing)*len(caused)# called N in Lütkepohlsigma_u=self.sigma_uvech_sigma_u=util.vech(sigma_u)sig_mask=np.zeros(sigma_u.shape)# set =1 twice to ensure, that all the ones needed are below the main# diagonal:sig_mask[causing_ind,caused_ind]=1sig_mask[caused_ind,causing_ind]=1vech_sig_mask=util.vech(sig_mask)inds=np.nonzero(vech_sig_mask)[0]# Make restriction matrixC=np.zeros((num_restr,len(vech_sigma_u)),dtype=float)forrowinrange(num_restr):C[row,inds[row]]=1Cs=np.dot(C,vech_sigma_u)d=np.linalg.pinv(duplication_matrix(k))Cd=np.dot(C,d)middle=np.linalg.inv(Cd@np.kron(sigma_u,sigma_u)@Cd.T)/2wald_statistic=t*(Cs.T@middle@Cs)df=num_restrdist=stats.chi2(df)pvalue=dist.sf(wald_statistic)crit_value=dist.ppf(1-signif)returnCausalityTestResults(causing,caused,wald_statistic,crit_value,pvalue,df,signif,test="inst",method="wald")

[docs]defsummary(self):buf=StringIO()rng=lrange(self.periods)foriinrange(self.neqs):ppm=output.pprint_matrix(self.decomp[i],rng,self.names)buf.write('FEVD for %s\n'%self.names[i])buf.write(ppm+'\n')print(buf.getvalue())

[docs]defplot(self,periods=None,figsize=(10,10),**plot_kwds):"""Plot graphical display of FEVD Parameters ---------- periods : int, default None Defaults to number originally specified. Can be at most that number """importmatplotlib.pyplotaspltk=self.neqsperiods=periodsorself.periodsfig,axes=plt.subplots(nrows=k,figsize=figsize)fig.suptitle('Forecast error variance decomposition (FEVD)')colors=[str(c)forcinnp.arange(k,dtype=float)/k]ticks=np.arange(periods)limits=self.decomp.cumsum(2)foriinrange(k):ax=axes[i]this_limits=limits[i].Thandles=[]forjinrange(k):lower=this_limits[j-1]ifj>0else0upper=this_limits[j]handle=ax.bar(ticks,upper-lower,bottom=lower,color=colors[j],label=self.names[j],**plot_kwds)handles.append(handle)ax.set_title(self.names[i])# just use the last axis to get handles for plottinghandles,labels=ax.get_legend_handles_labels()fig.legend(handles,labels,loc='upper right')plotting.adjust_subplots(right=0.85)returnfig