Update of /cvsroot/octave/octave-forge/main/econometrics/inst In directory sc8-pr-cvs3.sourceforge.net:/tmp/cvs-serv4513/main/econometrics/inst Modified Files: kernel_density_cvscore.m kernel_density_nodes.m __kernel_epanechnikov.m kernel_optimal_bandwidth.m kernel_regression_nodes.m Log Message: new versions that split out calculation of weights (now in oct file). Other minor improvements, and proper copyrights. Index: kernel_density_cvscore.m =================================================================== RCS file: /cvsroot/octave/octave-forge/main/econometrics/inst/kernel_density_cvscore.m,v retrieving revision 1.1 retrieving revision 1.2 diff -u -d -r1.1 -r1.2 --- kernel_density_cvscore.m 24 Jan 2007 10:55:54 -0000 1.1 +++ kernel_density_cvscore.m 29 Jun 2007 11:15:19 -0000 1.2 @@ -1,5 +1,5 @@ -function cvscore = kernel_density_cvscore(bandwidth, data, depvar) - dens = kernel_density(data, data, exp(bandwidth), true); +function cvscore = kernel_density_cvscore(bandwidth, data, kernel) + dens = kernel_density(data, data, exp(bandwidth), true, 0, 0, chol(cov(data)), kernel); dens = dens + eps; # some kernels can assign zero density cvscore = -mean(log(dens)); endfunction Index: __kernel_epanechnikov.m =================================================================== RCS file: /cvsroot/octave/octave-forge/main/econometrics/inst/__kernel_epanechnikov.m,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- __kernel_epanechnikov.m 23 Mar 2007 16:14:30 -0000 1.2 +++ __kernel_epanechnikov.m 29 Jun 2007 11:15:19 -0000 1.3 @@ -32,5 +32,6 @@ # compute kernel z = sumsq(z, 2); z = ((1/2) / c * (K + 2) * (1 - z)) .* (z < 1); + z = z + eps; # avoid possible divide by zero in kernel regression endfunction Index: kernel_optimal_bandwidth.m =================================================================== RCS file: /cvsroot/octave/octave-forge/main/econometrics/inst/kernel_optimal_bandwidth.m,v retrieving revision 1.1 retrieving revision 1.2 diff -u -d -r1.1 -r1.2 --- kernel_optimal_bandwidth.m 24 Jan 2007 10:55:54 -0000 1.1 +++ kernel_optimal_bandwidth.m 29 Jun 2007 11:15:19 -0000 1.2 @@ -1,4 +1,36 @@ -function bandwidth = kernel_optimal_bandwidth(data, depvar) +# Copyright (C) 2007 Michael Creel <mic...@ua...> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +# kernel_optimal_bandwidth: find optimal bandwith doing leave-one-out cross validation +# inputs: +# * data: data matrix +# * depvar: column vector or empty (""). +# If empty, do kernel density, orherwise, kernel regression +# * kernel (optional, string) the kernel function to use +# output: +# * h: the optimal bandwidth + +function bandwidth = kernel_optimal_bandwidth(data, depvar, kernel) + + if (nargin < 2) error("kernel_optimal_bandwidth: 3 arguments required"); endif + if (nargin < 3) kernel = "__kernel_epanechnikov"; endif + + do_density = false; + if isempty(depvar) do_density = true; endif; + # SA controls ub = 1; lb = -5; @@ -16,12 +48,12 @@ # bfgs controls bfgs_control = {100,0,0}; - if (nargin == 1) - bandwidth = samin("kernel_density_cvscore", {0, data}, sa_control); - bandwidth = bfgsmin("kernel_density_cvscore", {bandwidth, data}, bfgs_control); + if do_density + bandwidth = samin("kernel_density_cvscore", {0, data, kernel}, sa_control); + bandwidth = bfgsmin("kernel_density_cvscore", {bandwidth, data, kernel}, bfgs_control); else - bandwidth = samin("kernel_regression_cvscore", {0, data, depvar}, sa_control); - bandwidth = bfgsmin("kernel_regression_cvscore", {bandwidth, data, depvar}, bfgs_control); + bandwidth = samin("kernel_regression_cvscore", {0, data, depvar, kernel}, sa_control); + bandwidth = bfgsmin("kernel_regression_cvscore", {bandwidth, data, depvar, kernel}, bfgs_control); endif bandwidth = exp(bandwidth); endfunction Index: kernel_regression_nodes.m =================================================================== RCS file: /cvsroot/octave/octave-forge/main/econometrics/inst/kernel_regression_nodes.m,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- kernel_regression_nodes.m 23 Mar 2007 16:14:30 -0000 1.2 +++ kernel_regression_nodes.m 29 Jun 2007 11:15:19 -0000 1.3 @@ -1,4 +1,4 @@ -# Copyright (C) 2006 Michael Creel <mic...@ua...> +# Copyright (C) 2006, 2007 Michael Creel <mic...@ua...> # under the terms of the GNU General Public License. # # This program is free software; you can redistribute it and/or modify @@ -15,7 +15,7 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -# Kernel regression, parallel version: do calculations on nodes +# kernel_regression_nodes: for internal use by kernel_regression - does calculations on nodes function z = kernel_regression_nodes(eval_points, data, do_cv, kernel, points_per_node, nslaves, debug) @@ -40,19 +40,14 @@ y = data(:,1); data = data(:,2:columns(data)); + W = __kernel_weights(data, myeval, kernel); - # calculate distances - # note to self: loop is as fast as double kronecker with - # reshape. Also, simpler and safe in terms of memory. - z = zeros(nn,1); - for i = 1:nn - zz = data - kron(myeval(i,:), ones(n,1)); - zz = feval(kernel, zz); - if (do_cv) zz(i,:) = 0; endif - temp = sum(zz); - if (temp > 0) zz = zz / temp; endif - z(i,:) = zz'*y; - endfor + # drop own weight for CV + if (do_cv) W = W - diag(diag(W)); endif + + + W = W ./ (eps + repmat(sum(W,2),1,n)); # add eps to denominator to avoid crashes + z = W*y; if debug printf("z on node %d: \n", myrank); Index: kernel_density_nodes.m =================================================================== RCS file: /cvsroot/octave/octave-forge/main/econometrics/inst/kernel_density_nodes.m,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- kernel_density_nodes.m 23 Mar 2007 16:14:30 -0000 1.2 +++ kernel_density_nodes.m 29 Jun 2007 11:15:19 -0000 1.3 @@ -1,4 +1,4 @@ -# Copyright (C) 2006 Michael Creel <mic...@ua...> +# Copyright (C) 2006, 2007 Michael Creel <mic...@ua...> # under the terms of the GNU General Public License. # # This program is free software; you can redistribute it and/or modify @@ -15,7 +15,7 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -# Kernel density, parallel version: do calculations on nodes +# kernel_density_nodes: for internal use by kernel_density - does calculations on nodes function z = kernel_density_nodes(eval_points, data, do_cv, kernel, points_per_node, nslaves, debug) @@ -38,18 +38,13 @@ nn = rows(myeval); n = rows(data); - # calculate distances - # note to self: loop is as fast as double kronecker with - # reshape. Also, simpler and safe in terms of memory. - z = zeros(nn,1); - for i = 1:nn - zz = data - kron(myeval(i,:), ones(n,1)); - zz = feval(kernel, zz); - z(i) = sum(zz); - if (do_cv) z(i) = z(i) - zz(i); endif - endfor - - if (do_cv) z = z / (n-1); else z = z/n; endif + W = __kernel_weights(data, myeval, kernel); + if (do_cv) + W = W - diag(diag(W)); + z = sum(W,2) / (n-1); + else + z = sum(W,2) / n; + endif if debug printf("z on node %d: \n", myrank); |