Source code for statsmodels.stats.stattools

"""Statistical tests to be used in conjunction with the modelsNotes-----These functions have not been formally tested."""fromscipyimportstatsimportnumpyasnpfromstatsmodels.tools.sm_exceptionsimportValueWarning# TODO: these are pretty straightforward but they should be tested

[docs]defdurbin_watson(resids,axis=0):r""" Calculates the Durbin-Watson statistic Parameters ---------- resids : array_like Returns ------- dw : float, array_like The Durbin-Watson statistic. Notes ----- The null hypothesis of the test is that there is no serial correlation. The Durbin-Watson test statistics is defined as: .. math:: \sum_{t=2}^T((e_t - e_{t-1})^2)/\sum_{t=1}^Te_t^2 The test statistic is approximately equal to 2*(1-r) where ``r`` is the sample autocorrelation of the residuals. Thus, for r == 0, indicating no serial correlation, the test statistic equals 2. This statistic will always be between 0 and 4. The closer to 0 the statistic, the more evidence for positive serial correlation. The closer to 4, the more evidence for negative serial correlation. """resids=np.asarray(resids)diff_resids=np.diff(resids,1,axis=axis)dw=np.sum(diff_resids**2,axis=axis)/np.sum(resids**2,axis=axis)returndw

[docs]defjarque_bera(resids,axis=0):r""" The Jarque-Bera test of normality. Parameters ---------- resids : array_like Data to test for normality. Usually regression model residuals that are mean 0. axis : int, optional Axis to use if data has more than 1 dimension. Default is 0. Returns ------- JB : {float, ndarray} The Jarque-Bera test statistic. JBpv : {float, ndarray} The pvalue of the test statistic. skew : {float, ndarray} Estimated skewness of the data. kurtosis : {float, ndarray} Estimated kurtosis of the data. Notes ----- Each output returned has 1 dimension fewer than data The Jarque-Bera test statistic tests the null that the data is normally distributed against an alternative that the data follow some other distribution. The test statistic is based on two moments of the data, the skewness, and the kurtosis, and has an asymptotic :math:`\chi^2_2` distribution. The test statistic is defined .. math:: JB = n(S^2/6+(K-3)^2/24) where n is the number of data points, S is the sample skewness, and K is the sample kurtosis of the data. """resids=np.asarray(resids)# Calculate residual skewness and kurtosisskew=stats.skew(resids,axis=axis)kurtosis=3+stats.kurtosis(resids,axis=axis)# Calculate the Jarque-Bera test for normalityn=resids.shape[axis]jb=(n/6.)*(skew**2+(1/4.)*(kurtosis-3)**2)jb_pv=stats.chi2.sf(jb,2)returnjb,jb_pv,skew,kurtosis

[docs]defexpected_robust_kurtosis(ab=(5.0,50.0),dg=(2.5,25.0)):""" Calculates the expected value of the robust kurtosis measures in Kim and White assuming the data are normally distributed. Parameters ---------- ab : iterable, optional Contains 100*(alpha, beta) in the kr3 measure where alpha is the tail quantile cut-off for measuring the extreme tail and beta is the central quantile cutoff for the standardization of the measure db : iterable, optional Contains 100*(delta, gamma) in the kr4 measure where delta is the tail quantile for measuring extreme values and gamma is the central quantile used in the the standardization of the measure Returns ------- ekr : ndarray, 4-element Contains the expected values of the 4 robust kurtosis measures Notes ----- See `robust_kurtosis` for definitions of the robust kurtosis measures """alpha,beta=abdelta,gamma=dgexpected_value=np.zeros(4)ppf=stats.norm.ppfpdf=stats.norm.pdfq1,q2,q3,q5,q6,q7=ppf(np.array((1.0,2.0,3.0,5.0,6.0,7.0))/8)expected_value[0]=3expected_value[1]=((q7-q5)+(q3-q1))/(q6-q2)q_alpha,q_beta=ppf(np.array((alpha/100.0,beta/100.0)))expected_value[2]=(2*pdf(q_alpha)/alpha)/(2*pdf(q_beta)/beta)q_delta,q_gamma=ppf(np.array((delta/100.0,gamma/100.0)))expected_value[3]=(-2.0*q_delta)/(-2.0*q_gamma)returnexpected_value

[docs]defrobust_kurtosis(y,axis=0,ab=(5.0,50.0),dg=(2.5,25.0),excess=True):""" Calculates the four kurtosis measures in Kim & White Parameters ---------- y : array_like Data to compute use in the estimator. axis : int or None, optional Axis along which the kurtosis are computed. If `None`, the entire array is used. a iterable, optional Contains 100*(alpha, beta) in the kr3 measure where alpha is the tail quantile cut-off for measuring the extreme tail and beta is the central quantile cutoff for the standardization of the measure db : iterable, optional Contains 100*(delta, gamma) in the kr4 measure where delta is the tail quantile for measuring extreme values and gamma is the central quantile used in the the standardization of the measure excess : bool, optional If true (default), computed values are excess of those for a standard normal distribution. Returns ------- kr1 : ndarray The standard kurtosis estimator. kr2 : ndarray Kurtosis estimator based on octiles. kr3 : ndarray Kurtosis estimators based on exceedance expectations. kr4 : ndarray Kurtosis measure based on the spread between high and low quantiles. Notes ----- The robust kurtosis measures are defined .. math:: KR_{2}=\\frac{\\left(\\hat{q}_{.875}-\\hat{q}_{.625}\\right) +\\left(\\hat{q}_{.375}-\\hat{q}_{.125}\\right)} {\\hat{q}_{.75}-\\hat{q}_{.25}} .. math:: KR_{3}=\\frac{\\hat{E}\\left(y|y>\\hat{q}_{1-\\alpha}\\right) -\\hat{E}\\left(y|y<\\hat{q}_{\\alpha}\\right)} {\\hat{E}\\left(y|y>\\hat{q}_{1-\\beta}\\right) -\\hat{E}\\left(y|y<\\hat{q}_{\\beta}\\right)} .. math:: KR_{4}=\\frac{\\hat{q}_{1-\\delta}-\\hat{q}_{\\delta}} {\\hat{q}_{1-\\gamma}-\\hat{q}_{\\gamma}} where :math:`\\hat{q}_{p}` is the estimated quantile at :math:`p`. .. [*] Tae-Hwan Kim and Halbert White, "On more robust estimation of skewness and kurtosis," Finance Research Letters, vol. 1, pp. 56-73, March 2004. """if(axisisNoneor(y.squeeze().ndim==1andy.ndim!=1)):y=y.ravel()axis=0alpha,beta=abdelta,gamma=dgperc=(12.5,25.0,37.5,62.5,75.0,87.5,delta,100.0-delta,gamma,100.0-gamma)e1,e2,e3,e5,e6,e7,fd,f1md,fg,f1mg=np.percentile(y,perc,axis=axis)expected_value=(expected_robust_kurtosis(ab,dg)ifexcesselsenp.zeros(4))kr1=stats.kurtosis(y,axis,False)-expected_value[0]kr2=((e7-e5)+(e3-e1))/(e6-e2)-expected_value[1]ify.ndim==1:kr3=_kr3(y,alpha,beta)else:kr3=np.apply_along_axis(_kr3,axis,y,alpha,beta)kr3-=expected_value[2]kr4=(f1md-fd)/(f1mg-fg)-expected_value[3]returnkr1,kr2,kr3,kr4

def_medcouple_1d(y):""" Calculates the medcouple robust measure of skew. Parameters ---------- y : array_like, 1-d Data to compute use in the estimator. Returns ------- mc : float The medcouple statistic Notes ----- The current algorithm requires a O(N**2) memory allocations, and so may not work for very large arrays (N>10000). .. [*] M. Huberta and E. Vandervierenb, "An adjusted boxplot for skewed distributions" Computational Statistics & Data Analysis, vol. 52, pp. 5186-5201, August 2008. """# Parameter changes the algorithm to the slower for large ny=np.squeeze(np.asarray(y))ify.ndim!=1:raiseValueError("y must be squeezable to a 1-d array")y=np.sort(y)n=y.shape[0]ifn%2==0:mf=(y[n//2-1]+y[n//2])/2else:mf=y[(n-1)//2]z=y-mflower=z[z<=0.0]upper=z[z>=0.0]upper=upper[:,None]standardization=upper-loweris_zero=np.logical_and(lower==0.0,upper==0.0)standardization[is_zero]=np.infspread=upper+lowerh=spread/standardization# GH5395num_ties=np.sum(lower==0.0)ifnum_ties:# Replacements has -1 above the anti-diagonal, 0 on the anti-diagonal,# and 1 below the anti-diagonalreplacements=np.ones((num_ties,num_ties))-np.eye(num_ties)replacements-=2*np.triu(replacements)# Convert diagonal to anti-diagonalreplacements=np.fliplr(replacements)# Always replace upper right blockh[:num_ties,-num_ties:]=replacementsreturnnp.median(h)

[docs]defmedcouple(y,axis=0):""" Calculate the medcouple robust measure of skew. Parameters ---------- y : array_like Data to compute use in the estimator. axis : {int, None} Axis along which the medcouple statistic is computed. If `None`, the entire array is used. Returns ------- mc : ndarray The medcouple statistic with the same shape as `y`, with the specified axis removed. Notes ----- The current algorithm requires a O(N**2) memory allocations, and so may not work for very large arrays (N>10000). .. [*] M. Huberta and E. Vandervierenb, "An adjusted boxplot for skewed distributions" Computational Statistics & Data Analysis, vol. 52, pp. 5186-5201, August 2008. """y=np.asarray(y,dtype=np.double)# GH 4243ifaxisisNone:return_medcouple_1d(y.ravel())returnnp.apply_along_axis(_medcouple_1d,axis,y)