## octave-cvsupdate

 [Octave-cvsupdate] octave-forge/main/statistics anderson_darling_test.m,1.3,1.4 From: Paul Kienzle - 2005-09-26 07:38:57 Update of /cvsroot/octave/octave-forge/main/statistics In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv8436 Modified Files: anderson_darling_test.m Log Message: Return more info; improve exponential test criteria; add docs Index: anderson_darling_test.m =================================================================== RCS file: /cvsroot/octave/octave-forge/main/statistics/anderson_darling_test.m,v retrieving revision 1.3 retrieving revision 1.4 diff -u -d -r1.3 -r1.4 --- anderson_darling_test.m 2 Aug 2004 20:01:03 -0000 1.3 +++ anderson_darling_test.m 26 Sep 2005 07:38:48 -0000 1.4 @@ -1,12 +1,12 @@ -## [q,A] = anderson_darling_test(x,uniform|normal|exponential) +## [q,Asq,info] = anderson_darling_test(x,uniform|normal|exponential) ## ## Test the hypothesis that x is selected from the given distribution -## using the Anderson-Darling test. Returns a small q value if the data -## are not distributed as expected. +## using the Anderson-Darling test. If the returned q is small, reject +## the hypothesis at the q*100% level. ## -## The Anderson-Darling A statistic is calculated as follows: +## The Anderson-Darling A^2 statistic is calculated as follows: ## -## A = -n - \sum_{i=1}^n (2i-1)/n log(z_i (1-z_{n-i+1})) +## A^2_n = -n - \sum_{i=1}^n (2i-1)/n log(z_i (1-z_{n-i+1})) ## ## where z_i is the ordered position of the x's in the CDF of the ## distribution. Unlike the Kolmogorov-Smirnov statistic, the @@ -17,16 +17,17 @@ ## distribution parameters from the data, convert the values ## to CDF values, and compare the result to tabluated critical ## values. This includes an correction for small n which -## works well enough for n >= 8, but less so from smaller n. +## works well enough for n >= 8, but less so from smaller n. The +## returned info.Asq_corrected contains the adjusted statistic. ## ## For 'uniform', assume the values are uniformly distributed -## in (0,1), compute A and return the corresponding p value from -## 1-anderson_darling_cdf(A,n). +## in (0,1), compute A^2 and return the corresponding p value from +## 1-anderson_darling_cdf(A^2,n). ## ## If you are selecting from a known distribution, convert your ## values into CDF values for the distribution and use 'uniform'. ## Do not use 'uniform' if the distribution parameters are estimated -## from the data itself, as this sharply biases the A statistic +## from the data itself, as this sharply biases the A^2 statistic ## toward smaller values. ## ## [1] Stephens, MA; (1986), "Tests based on EDF statistics", in @@ -35,17 +36,21 @@ ## Author: Paul Kienzle ## This program is granted to the public domain. -function [q,A] = anderson_darling_test(x,dist) +function [q,Asq,info] = anderson_darling_test(x,dist) if size(x,1) == 1, x=x(:); end x = sort(x); n = size(x,1); use_cdf = 0; + + # Compute adjustment and critical values to use for stats. switch dist case 'normal', + # This expression for adj is used in R. + # Note that the values from NIST dataplot don't work nearly as well. adj = 1 + (.75 + 2.25/n)/n; - qvals = [ 1, 0.1, 0.05, 0.025, 0.01 ]; - Acrit = [-Inf, 0.631, 0.752, 0.873, 1.035]/adj; + qvals = [ 0.1, 0.05, 0.025, 0.01 ]; + Acrit = [ 0.631, 0.752, 0.873, 1.035]; x = stdnormal_cdf(zscore(x)); case 'uniform', @@ -53,25 +58,39 @@ ## This will drive the statistic to infinity. x(x<0) = 0; x(x>1) = 1; - #qvals = [ 1, 0.1, 0.05, 0.025, 0.01 ]; - #Acrit = [-Inf, 1.933, 2.492, 3.077, 3.878]; + adj = 1.; + qvals = [ 0.1, 0.05, 0.025, 0.01 ]; + Acrit = [ 1.933, 2.492, 3.070, 3.857 ]; use_cdf = 1; case 'XXXweibull', adj = 1 + 0.2/sqrt(n); - Acrit = [-Inf, 0.637, 0.757, 0.877, 1.038]*adj; + qvals = [ 0.1, 0.05, 0.025, 0.01 ]; + Acrit = [ 0.637, 0.757, 0.877, 1.038]; ## XXX FIXME XXX how to fit alpha and sigma? x = weibull_cdf(x,ones(n,1)*alpha,ones(n,1)*sigma); case 'exponential', - qvals = [ 1, 0.1, 0.05, 0.025, 0.01 ]; adj = 1 + 0.6/n; - Acrit = [-Inf, 1.062, 1.321, 1.591, 1.959]/adj; + qvals = [ 0.1, 0.05, 0.025, 0.01 ]; + # Critical values depend on n. Choose the appropriate critical set. + # These values come from NIST dataplot/src/dp8.f. + Acritn = [ + 0, 1.022, 1.265, 1.515, 1.888 + 11, 1.045, 1.300, 1.556, 1.927; + 21, 1.062, 1.323, 1.582, 1.945; + 51, 1.070, 1.330, 1.595, 1.951; + 101, 1.078, 1.341, 1.606, 1.957; + ]; + # FIXME: consider interpolating in the critical value table. + Acrit = Acritn(lookup(Acritn(:,1),n),2:5); lambda = 1./mean(x); # exponential parameter estimation x = exponential_cdf(x,ones(n,1)*lambda); otherwise + # FIXME consider implementing more of distributions; a number + # of them are defined in NIST dataplot/src/dp8.f. error("Anderson-Darling test for %s not implemented", dist); end @@ -80,12 +99,23 @@ endif i = [1:n]'*ones(1,size(x,2)); - A = -n - sum( (2*i-1) .* (log(x) + log(1-x(n:-1:1,:))) )/n; + Asq = -n - sum( (2*i-1) .* (log(x) + log(1-x(n:-1:1,:))) )/n; + # Lookup adjusted critical value in the cdf (if uniform) or in the + # the critical table. if use_cdf - q = 1-anderson_darling_cdf(A, n); + q = 1-anderson_darling_cdf(Asq*adj, n); else - q = qvals(lookup(Acrit,A)); + idx = lookup([-Inf,Acrit],Asq*adj); + q = [1,qvals](idx); + endif + + if nargout > 2, + info.Asq = Asq; + info.Asq_corrected = Asq*adj; + info.Asq_critical = [100*(1-qvals); Acrit]'; + info.p = 1-q; + info.p_is_precise = use_cdf; endif