Learn how easy it is to sync an existing GitHub or Google Code repo to a SourceForge project! See Demo

Close

Commit [e9be3c] default Maximize Restore History

rewritten regularization smoothing functions

jjstickel jjstickel 2008-08-21

removed inst/tkrgdatasmscat.m
removed inst/tkrgdatasmscatwrap.m
removed inst/tkrgscatdatasmooth.m
copied inst/tkrgdatasmdd.m -> inst/rgdtsmcore.m
copied inst/tkrgdatasmddwrap.m -> inst/rgdtsmcorewrap.m
copied inst/tkrgdatasmooth.m -> inst/regdatasmooth.m
inst/tkrgdatasmscat.m
File was removed.
inst/tkrgdatasmscatwrap.m
File was removed.
inst/tkrgscatdatasmooth.m
File was removed.
inst/tkrgdatasmdd.m to inst/rgdtsmcore.m
--- a/inst/tkrgdatasmdd.m
+++ b/inst/rgdtsmcore.m
@@ -13,80 +13,147 @@
 ## along with this program; If not, see <http://www.gnu.org/licenses/>. 
 
 ## -*- texinfo -*-
-##@deftypefn {Function File} {@var{ys} =} tkrgdatasmdd (@var{x}, @var{y}, @var{lambda})
-##@deftypefnx {Function File} {@var{ys} =} tkrgdatasmdd (@var{x}, @var{y}, @var{lambda}, @var{o})
-##@deftypefnx {Funciton File} {[@var{ys}, @var{v}]} = tkrgdatasmdd (...)
+##@deftypefn {Function File} {[@var{xhat}, @var{yhat}] =} rgdtsmcore (@var{x}, @var{y}, @var{d}, @var{lambda}, [@var{options}])
 ##
-## Smooths the @var{y} vs. @var{x} data values by Tikhonov
-## regularization and divided differences (arbitrary spacing of
-## @var{x}). Although this function can be used directly, the more
-## feature rich function "tkrgdatasmooth" should be used instead
-## @itemize @w
-## @item Input:
-## @itemize @w
-##   @item @var{x}:      data series of sampling positions (must be increasing)
-##   @item @var{y}:      data series, assumed to be sampled at equal intervals
-##   @item @var{lambda}: smoothing parameter; large lambda gives smoother result
-##   @item @var{o}:      order of smoothing derivative
-##  @end itemize
-## @item Output:
-##  @itemize @w
-##   @item @var{ys}:     smoothed data
-##   @item @var{v}:      generalized cross-validation variance
-## @end itemize
-## @end itemize
+## Smooths @var{y} vs. @var{x} values by Tikhonov regularization.
+## Although this function can be used directly, the more feature rich
+## function "regdatasmooth" should be used instead.  In addition to
+## @var{x} and @var{y}, required input includes the smoothing derivative
+## @var{d} and the regularization parameter @var{lambda}.  The smooth
+## curve is returned as @var{xhat}, @var{yhat}.  The generalized cross
+## validation variance @var{v} may also be returned.
+##
+## Currently supported input options are (multiple options are allowed):
+##
+##@table @code
+## @item "gridx"
+##  use an equally spaced @var{xhat} vector for the smooth curve rather
+##  than @var{x}; this option is implied if either the "Nhat" or "range"
+##  options are used
+## @item "Nhat", @var{value}
+##  number of equally spaced smoothed points (default = 100)
+## @item "range", @var{value}
+##  two element vector [min,max] giving the desired output range of
+##  @var{xhat}; if not provided or if the provided range is less than
+##  the range of the data, the default is the min and max of @var{x}
+## @item "relative"
+##  use relative differences for the goodnes of fit term
+## @item "midpointrule"
+##  use the midpoint rule for the integration terms rather than a direct sum
+##@end table
 ##
 ## References:  Anal. Chem. (2003) 75, 3631; AIChE J. (2006) 52, 325
-## @seealso{tkrgdatasmooth, tkrgdatasmscat}
 ##@end deftypefn
 
-## Addapted with permission from 'whitsmdd', Paul Eilers, 2003
 
+function [xhat, yhat, v] = rgdtsmcore (x, y, d, lambda, varargin)
 
-
-function [ys, v] = tkrgdatasmdd(x, y, lambda, d)
+  ## options:  gridx, Nhat, range, relative, midpointrule
+  ## add an option to allow an arbitrary, monotonic xhat to be provided?
+  ## Defaults if not provided
+  Nhat = 100;
+  range = [min(x),max(x)];
+  gridx = 0;
+  relative = 0;
+  midpr = 0;
   
-  ## Defaults if not provided
-  if nargin <= 3
-    d = 2;
+  ## this is a hack to allow passing varargin from another function...
+  ## is there a better way?
+  if length(varargin)
+    if iscell(varargin{1})
+      varargin = varargin{1};
+    endif
+  endif
+  ## parse the provided options
+  if ( length(varargin) )
+    for i = 1:length(varargin)
+      arg = varargin{i};
+      if ischar(arg)
+	switch arg
+	  case "gridx"
+	    gridx = 1;
+	  case "Nhat"
+	    gridx = 1;
+	    Nhat = varargin{i+1};
+	  case "range"
+	    gridx = 1;
+	    range = varargin{i+1};
+	  case "relative"
+	    relative = 1;
+	  case "midpointrule"
+	    midpr = 1;
+	  otherwise
+	    printf("Option '%s' is not implemented;\n", arg)
+	endswitch
+      endif
+    endfor
+  endif
+  if (gridx & midpr)
+    warning("midpointrule is not used if mapping to regular spaced x")
+    midpr = 0;
+  endif
+  
+  N = length(x);
+  
+  ## construct xhat, M, D
+  if (gridx)
+    xmin = min(range(1),min(x));
+    xmax = max(range(2),max(x));
+    dx = (xmax-xmin)/(Nhat-1);
+    xhat = (xmin:dx:xmax)';
+    ## M is the mapping matrix yhat -> y
+    M = speye(Nhat);
+    idx = round((x - xmin) / dx) + 1;
+    M = M(idx,:);
+    ## note: this D does not include dx^(-d), but scaled by delta below
+    D = diff(speye(Nhat),d);
+  else
+    Nhat = N;
+    xhat = x;
+    M = speye(N);
+    D = ddmat(x,d);
   endif
 
-  ## find the average dx in order to scale lambda
-  L = x(end) - x(1);
-  N = length(x);
-  dx = L/(N-1);
+  ## construct "weighting" matrices W and U
+  ## use relative differences
+  if (relative)
+    Yinv = spdiag(1./y);
+    W = Yinv^2;
+  else
+    W = speye(N);
+  endif
+  ## use midpoint rule integration (rather than simple sums)
+  if (midpr)
+    Bhat = spdiag(-ones(N-1,1),-1) + spdiag(ones(N-1,1),1);
+    Bhat(1,1) = -1;
+    Bhat(N,N) = 1;
+    B = 1/2*spdiag(Bhat*x);
+    if ( floor(d/2) == d/2 ) # test if d is even
+      dh = d/2;
+      Btilda = B(dh+1:N-dh,dh+1:N-dh);
+    else # d is odd
+      dh = ceil(d/2);
+      Btilda = B(dh:N-dh,dh:N-dh);
+    endif
+    W = W*B;
+    U = Btilda;
+  else
+    ## W = W*speye(Nhat);
+    U = speye(Nhat-d);
+  endif
   
-  ## form the matrices
-  ## D is the derivative matrix
-  D = ddmat(x,d);
-
-  ## B and Btilda are total integration matrices
-  Bhat = spdiag(-ones(N-1,1),-1) + spdiag(ones(N-1,1),1);
-  Bhat(1,1) = -1;
-  Bhat(N,N) = 1;
-  B = 1/2*spdiag(Bhat*x);
-  ##B = 1/dx*speye(N,N);  # force equal waiting, even for variable spaced x?
-  if ( floor(d/2) == d/2 ) # test if d is even
-    dh = d/2;
-    Btilda = B(dh+1:N-dh,dh+1:N-dh);
-  else # d is odd
-    dh = ceil(d/2);
-    Btilda = B(dh:N-dh,dh:N-dh);
-  endif
-
   ## Smoothing
-  delta = sqrt(trace(D'*D)); # a suitable invariant of D for scaling lambda
-  ##f = (B + lambda*D'*Btilda*D) \ B*y;
-  C = chol(B + lambda*delta^(-1)*D'*Btilda*D);
-  ys = C \ (C' \ B*y);
+  delta = trace(D'*D)/Nhat^(2+d);  # using "relative" affects this!
+  ##yhat = (M'*W*M + lambda*D'*U*D) \ M'*W*y;
+  C = chol(M'*W*M + lambda*delta^(-1)*D'*U*D);
+  yhat = C \ (C' \ M'*W*y);
   
   ## Computation of hat diagonal and cross-validation
-  if (nargout > 1)
+  if (nargout > 2)
     ## from AIChE J. (2006) 52, 325
-    ##H = (B + lambda*D'*Btilda*D) \ B;
-    H = C \ (C' \ B);
+    H = M * ( C \ (C' \ M'*W) );
     ## note: this is variance, squared of the standard error that Eilers uses
-    v = (y - ys)'*B*(y - ys) / (1 - trace(H)/N)^2;
+    v = (M*yhat - y)'*(M*yhat - y)/N / (1 - trace(H)/N)^2;
   endif
   
 endfunction
inst/tkrgdatasmddwrap.m to inst/rgdtsmcorewrap.m
--- a/inst/tkrgdatasmddwrap.m
+++ b/inst/rgdtsmcorewrap.m
@@ -13,26 +13,53 @@
 ## along with this program; If not, see <http://www.gnu.org/licenses/>. 
 
 ## -*- texinfo -*-
-##@deftypefn {Function File} {@var{cve}|@var{stdevdif} =} tkrgdatasmddwrap (@var{log10lambda}, @var{x}, @var{y}, @var{o}, 'cve'|'stdev'[, @var{stdev}])
+## @deftypefn {Function File} {@var{cve}|@var{stdevdif} =} rgdtsmcorewrap (@var{log10lambda}, @var{x}, @var{y}, @var{d}, @var{mincell}, @var{options})
 ##
-##  Wrapper function for tkrgdatasmdd in order to minimize over lambda
-##  w.r.t. cross-validation error OR the squared difference between the
-##  standard deviation of data from the smooth data and the given
-##  standard deviation.
-##@end deftypefn
+##  Wrapper function for rgdtsmcore in order to minimize over
+##  @var{lambda} w.r.t. cross-validation error OR the squared difference
+##  between the standard deviation of (@var{y}-@var{yhat}) and the given
+##  standard deviation.  This function is called from regdatasmooth.
+## @end deftypefn
 
 
-
-function out = tkrgdatasmddwrap (log10lambda, x, y, d, mintype, stdev)
+function out = rgdtsmcorewrap (log10lambda, x, y, d, mincell, options)
 
   lambda = 10^(log10lambda);
   
-  if ( strcmp(mintype,"stdev") )
-    ys = tkrgdatasmdd(x, y, lambda, d);
-    stdevd = std(y-ys);
+  if ( length(mincell)==2 )
+    stdev = mincell{2};
+    [xhat, yhat] = rgdtsmcore (x, y, d, lambda, options);
+
+    N = length(x);
+    Nhat = length(xhat);
+
+    relative = 0;
+    for i = 1:length(options)
+      if strcmp(options{i},"relative")
+	relative = 1;
+      endif
+    endfor
+
+    if (Nhat!=N)
+      dx = (max(xhat)-min(xhat))/(Nhat-1);
+      idx = round((x - min(xhat)) / dx) + 1;
+      if relative
+	stdevd = std((y-yhat(idx))./y);
+      else
+	stdevd = std(y-yhat(idx));
+      endif
+    else
+      if relative
+	stdevd = std((y-yhat)./y);
+      else
+	stdevd = std(y-yhat);
+      endif
+    endif
+    
     out = (stdevd - stdev)^2;
+
   else
-    [ys, cve] = tkrgdatasmdd(x, y, lambda, d);
+    [xhat, yhat, cve] = rgdtsmcore (x, y, d, lambda, options);
     out = cve;
   endif
 
inst/tkrgdatasmooth.m to inst/regdatasmooth.m
--- a/inst/tkrgdatasmooth.m
+++ b/inst/regdatasmooth.m
@@ -13,25 +13,96 @@
 ## along with this program; If not, see <http://www.gnu.org/licenses/>. 
 
 ## -*- texinfo -*-
-##@deftypefn {Function File} {[@var{ys}, @var{lambda}] =} tkrgdatasmooth (@var{x}, @var{y})
-##@deftypefnx {Function File} {[@var{ys}, @var{lambda}] =} tkrgdatasmooth (@var{x}, @var{y}, @var{o})
-##@deftypefnx {Function File} {[@var{ys}, @var{lambda}] =} tkrgdatasmooth (@var{x}, @var{y}, @var{o}, @var{option}, @var{value})
+##@deftypefn {Function File} {[@var{xhat}, @var{yhat}, @var{lambda}] =} regdatasmooth (@var{x}, @var{y}, [@var{options}])
 ##
-## Smooths the @var{y} values of 1D data by Tikhonov regularization.
-## The @var{x} values need not be equally spaced but they should be
-## ordered (and non-overlapping). The order of the smoothing derivative
-## @var{o}, can be provided (default is 2), and the option-value pair
-## should be either the regularizaiton parameter "lambda" or the
-## standard deviation "stdev" of the randomness in the data.  With no
-## option supplied, generalized cross-validation is used to determine
-## lambda.
-## Reference: Anal. Chem. (2003) 75, 3631.
-## @seealso{tkrgscatdatasmooth}
+## Smooths the @var{y} vs. @var{x} values of 1D data by Tikhonov
+## regularization. The smooth curve is returned as @var{xhat},
+## @var{yhat}. The regularization parameter @var{lambda} that was used
+## for the smoothing may also be returned.
+##
+## Currently supported input options are (multiple options are allowed):
+##
+##@table @code
+##@item "d", @var{value}
+## the smoothing derivative to use (default = 2)
+##@item "lambda", @var{value}
+## the regularization paramater to use
+##@item "stdev", @var{value}
+## the standard deviation of the measurement of @var{y}; an optimal
+## value for lambda will be determined by matching the provided
+## @var{value} with the standard devation of @var{yhat}-@var{y};
+## if the option "relative" is also used, then a relative standard
+## deviation is inferred
+##@item "gcv"
+## use generalized cross-validation to determine the optimal value for
+## lambda; if neither "lambda" nor "stdev" options are given, this
+## option is implied
+##@item "lguess"
+## the initial value for lambda to use in the iterative minimization
+## algorithm to find the optimal value (default = 1)
+##@item "gridx"
+## use an equally spaced @var{xhat} vector for the smooth curve rather
+## than @var{x}; if "gridx" is NOT used, @var{x} must be ordered and
+## non-overlapping; this option is implied if either the "Nhat" or
+## "range" options are used
+##@item "Nhat", @var{value}
+## number of equally spaced smoothed points (default = 100)
+##@item "range", @var{value}
+## two element vector [min,max] giving the desired output range of
+## @var{xhat}; if not provided or if the provided range is less than
+## the range of the data, the default is the min and max of @var{x}
+##@item "relative"
+## use relative differences for the goodnes of fit term
+##@item "midpointrule"
+## use the midpoint rule for the integration terms rather than a direct
+## sum; this option conflicts with the option "gridx"
+##@end table
+##
+## Please run the demos for example usage.
+##
+## References:  Anal. Chem. (2003) 75, 3631; AIChE J. (2006) 52, 325
+## @seealso{rgdtsmcore}
 ## @end deftypefn
 
+function [xhat, yhat, lambda] = regdatasmooth (x, y, varargin)
 
+  ## defaults
+  d = 2;
+  lambda = 0;
+  stdev = 0;
+  guess = 0;
+  
+  ## parse options for d, lambda, stdev, gcv, lguess;
+  ## remaining options (gridx, Nhat, range, relative, midpointrule)
+  ## will be sent directly to the core function
+  idx = [];
+  if ( nargin > 2)
+    for i = 1:nargin-2
+      arg = varargin{i};
+      if ischar(arg)
+	switch arg
+	  case "d"
+	    d = varargin{i+1};
+	    idx = [idx,i,i+1];
+	  case "lambda"
+	    lambda = varargin{i+1};
+	    idx = [idx,i,i+1];
+	  case "stdev"
+	    stdev = varargin{i+1};
+	    idx = [idx,i,i+1];
+	  case "gcv"
+	    idx = [idx,i];
+	  case "lguess"
+	    guess = log10(varargin{i+1});
+	    idx = [idx,i,i+1];
+	endswitch
+      endif
+    endfor
+  endif
+  varargin(idx) = [];
+  options = varargin;
 
-function [ys, lambda] = tkrgdatasmooth (x, y, d, option, value)
+  ## add warning if more than one gcv, lambda, or stdev options provided?
 
   if (length(x)!=length(y))
     error("x and y must be equal length vectors")
@@ -43,116 +114,67 @@
     y = y';
   endif
 
-  if (nargin < 3)
-    d = 2;
-  endif
-
-  guess = 0;
-  if (nargin > 3)
-    if ( strcmp(option,"lambda") )
-      ## if lambda provided, use it directly
-      lambda = value;
-    elseif ( strcmp(option,"stdev") )
-      ## if stdev is provided, scale it and use it
-      stdev = value;
-      opt = optimset("TolFun",1e-6,"MaxFunEvals",20);
-      log10lambda = fminunc ("tkrgdatasmddwrap", guess, opt, x, y, d, "stdev", stdev);
-      lambda = 10^log10lambda;
-    else
-      warning("option #s is not recognized; using cross-validation",option)
-    endif
+  if (lambda)
+    ## do nothing and use the provided lambda
+  elseif ( stdev )
+    opt = optimset("TolFun",1e-6,"MaxFunEvals",20);
+    log10lambda = fminunc ("rgdtsmcorewrap", guess, opt, x, y, d, {"stdev", stdev}, options);
+    lambda = 10^log10lambda;
   else
     ## perform cross-validation
     opt = optimset("TolFun",1e-4,"MaxFunEvals",20);
-    log10lambda = fminunc ("tkrgdatasmddwrap", guess, opt, x, y, d, "cve");
+    log10lambda = fminunc ("rgdtsmcorewrap", guess, opt, x, y, d, {"cve"}, options);
     lambda = 10^log10lambda;
   endif
   
-  ys = tkrgdatasmdd(x, y, lambda, d);
-
+  %%ys = tkrgdatasmdd(x, y, lambda, d);
+  [xhat, yhat] = rgdtsmcore (x, y, d, lambda, options);
+  
 endfunction
 
+## rewrite the demos!
 
 %!demo
 %! npts = 100;
-%! x = linspace(0,1,npts)';
-%! x = x + 1/npts*(rand(npts,1)-0.5);
-%! y = sin(10*x);
-%! stdev = 1e-1;
-%! y = y + stdev*randn(npts,1);
+%! x = linspace(0,2*pi,npts)';
+%! x = x + 2*pi/npts*(rand(npts,1)-0.5);
+%! y = sin(x);
+%! y = y + 1e-1*randn(npts,1);
 %! yp = ddmat(x,1)*y;
 %! y2p = ddmat(x,2)*y;
-%! d = 4;
-%! [ys, lambda] = tkrgdatasmooth (x,y,d);
+%! [xh, yh, lambda] = regdatasmooth (x, y, "d",4,"stdev",1e-1,"midpointrule");
 %! lambda
-%! ysp = ddmat(x,1)*ys;  
-%! ys2p = ddmat(x,2)*ys;
-%! figure(1);
-%! plot(x,y,'o',x,ys)
+%! yhp = ddmat(x,1)*yh;  
+%! yh2p = ddmat(x,2)*yh;
+%! clf
+%! subplot(221)
+%! plot(x,y,'o',x,yh,x,sin(x))
 %! title("y(x)")
-%! figure(2);
-%! plot(x(1:end-1),[yp,ysp])
-%! axis([min(x),max(x),min(ysp)-abs(min(ysp)),max(ysp)*2])
+%! legend("noisy","smoothed","sin(x)","location","northeast");
+%! subplot(222)
+%! plot(x(1:end-1),[yp,yhp,cos(x(1:end-1))])
+%! axis([min(x),max(x),min(yhp)-abs(min(yhp)),max(yhp)*2])
 %! title("y'(x)")
-%! figure(3)
-%! plot(x(2:end-1),[y2p,ys2p])
-%! axis([min(x),max(x),min(ys2p)-abs(min(ys2p)),max(ys2p)*2])
+%! legend("noisy","smoothed","cos(x)","location","southeast");
+%! subplot(223)
+%! plot(x(2:end-1),[y2p,yh2p,-sin(x(2:end-1))])
+%! axis([min(x),max(x),min(yh2p)-abs(min(yh2p)),max(yh2p)*2])
 %! title("y''(x)")
+%! legend("noisy","smoothed","-sin(x)","location","southeast");
 %! %--------------------------------------------------------
-%! % this demo used generalized cross-validation to determine lambda
+%! % smoothing of monotonic data, using "stdev" to determine the optimal lambda
 
 %!demo
-%! npts = 100;
-%! x = linspace(0,1,npts)';
-%! x = x + 1/npts*(rand(npts,1)-0.5);
-%! y = sin(10*x);
-%! stdev = 1e-1;
-%! y = y + stdev*randn(npts,1);
-%! yp = ddmat(x,1)*y;
-%! y2p = ddmat(x,2)*y;
-%! d = 4;
-%! [ys, lambda] = tkrgdatasmooth (x,y,d,"stdev",stdev);
+%! npts = 20;
+%! x = rand(npts,1)*2*pi;
+%! y = sin(x);
+%! y = y + 1e-1*randn(npts,1);
+%! [xh, yh, lambda] = regdatasmooth (x, y, "d", 3, "Nhat", 200, "range", [0,2*pi]);
 %! lambda
-%! ysp = ddmat(x,1)*ys;  
-%! ys2p = ddmat(x,2)*ys;
+%! clf
 %! figure(1);
-%! plot(x,y,'o',x,ys)
+%! plot(x,y,'o',xh,yh,xh,sin(xh))
 %! title("y(x)")
-%! figure(2);
-%! plot(x(1:end-1),[yp,ysp])
-%! axis([min(x),max(x),min(ysp)-abs(min(ysp)),max(ysp)*2])
-%! title("y'(x)")
-%! figure(3)
-%! plot(x(2:end-1),[y2p,ys2p])
-%! axis([min(x),max(x),min(ys2p)-abs(min(ys2p)),max(ys2p)*2])
-%! title("y''(x)")
+%! legend("noisy","smoothed","sin(x)","location","northeast");
 %! %--------------------------------------------------------
-%! % this demo used standard deviation to determine lambda
-
-%!demo
-%! npts = 100;
-%! x = linspace(0,1,npts)';
-%! x = x + 1/npts*(rand(npts,1)-0.5);
-%! y = sin(10*x);
-%! stdev = 1e-1;
-%! y = y + stdev*randn(npts,1);
-%! yp = ddmat(x,1)*y;
-%! y2p = ddmat(x,2)*y;
-%! d = 4;
-%! [ys, lambda] = tkrgdatasmooth (x,y,d,"lambda",100);
-%! lambda
-%! ysp = ddmat(x,1)*ys;  
-%! ys2p = ddmat(x,2)*ys;
-%! figure(1);
-%! plot(x,y,'o',x,ys)
-%! title("y(x)")
-%! figure(2);
-%! plot(x(1:end-1),[yp,ysp])
-%! axis([min(x),max(x),min(ysp)-abs(min(ysp)),max(ysp)*2])
-%! title("y'(x)")
-%! figure(3)
-%! plot(x(2:end-1),[y2p,ys2p])
-%! axis([min(x),max(x),min(ys2p)-abs(min(ys2p)),max(ys2p)*2])
-%! title("y''(x)")
-%! %--------------------------------------------------------
-%! % this demo used a user specified lambda that was too large
+%! % smoothing of scattered data, using "gcv" to determine the optimal lambda