From: Muthu <gnumuthu@us...>  20070928 04:15:33

Update of /cvsroot/octave/octaveforge/main/infotheory/inst In directory sc8prcvs3.sourceforge.net:/tmp/cvsserv16929/inst Added Files: condentr_seq.m infoentr_seq.m infogain_seq.m mutualinfo_seq.m Log Message: sequence functions for Information theory  NEW FILE: infogain_seq.m  ## Copyright (C) 2006 Joseph Wakeling <joseph.wakeling@...> ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA ## ## * texinfo * ## @deftypefn {Function File} {} infogain_seq (@var{seq_x}, @var{seq_y}) ## ## Gives the information gain ratio (also known as the ## `uncertainty coefficient') of the sequence x ## conditional on y: ## I(XY) = I(X;Y)/H(X) ## ## @example ## @group ## X=[1, 1, 2, 1, 1]; ## Y=[2, 2, 1, 1, 2]; ## infogain_seq(X,Y) ## @end group ## @end example ## @end deftypefn ## @seealso{infoentr_seq} ## function IGR = infogain_seq(x,y) if nargin!=2 usage("infogain(x,y)") endif IGR = mutualinfo_seq(x,y)/infoentr_seq(x); # Could also do # IGR = 1  condentr(x,y)/infoentr(x); end %! %! %!  NEW FILE: condentr_seq.m  ## Copyright (C) 2006 Joseph Wakeling <joseph.wakeling@...> ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA ## ## * texinfo * ## @deftypefn {Function File} {} condentr_seq (@var{seq_x}, @var{seq_y}) ## Calculates information entropy of the sequence x ## conditional on the sequence y: ## H(XY) = H(X,Y)  H(Y) ## @example ## @group ## X=[1, 1, 2, 1, 1]; ## Y=[2, 2, 1, 1, 2]; ## condentr_seq(X,Y) ## @end group ## @end example ## @end deftypefn ## @seealso{infoentr_seq} function Hcond = condentr_seq(x,y) if nargin!=2 usage("condentr(x,y)") endif Hcond = infoentr_seq(x,y)  infoentr_seq(y); end %! %! %!  NEW FILE: mutualinfo_seq.m  ## Copyright (C) 2006 Joseph Wakeling <joseph.wakeling@...> ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA ## ## * texinfo * ## @deftypefn {Function File} {} mutualinfo_seq (@var{seq_x}, @var{seq_y}) ## ## Calculates mutual information of the sequences x and y: ## I(X;Y) = H(X)  H(XY) = H(Y)  H(YX) = I(Y;X) ## ## @example ## @group ## X=[1, 1, 2, 1, 1]; ## Y=[2, 2, 1, 1, 2]; ## mutualinfo_seq(X,Y) ## @end group ## @end example ## @end deftypefn ## @seealso{infoentr_seq} function I = mutualinfo_seq(x,y) if nargin!=2 usage("mutualinfo(x,y)") endif I = infoentr_seq(x)  condentr_seq(x,y);  NEW FILE: infoentr_seq.m  ## Copyright (C) 2006 Joseph Wakeling <joseph.wakeling@...> ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA ## ## * texinfo * ## @deftypefn {Function File} {} infoentr_seq (@var{seq_x}, @var{seq_y}) ## If just one input, calculates Shannon Information Entropy ## of the sequence x: ## H(X) = @math{\sum_{x \in X} p(x) log2(1/p(x))} ## ## If two inputs, calculates joint entropy of the concurrent ## sequences x and y: ## H(X,Y) = @math{\sum_{x \in X, y \in Y} p(x,y) log2(1/p(x,y))} ## ## @example ## @group ## X=[1, 1, 2, 1, 1]; ## infoentr_seq(X) ## infoentr_seq([1,2,2,2,1,1,1,1,1],[1,2,2,2,2,2,1,1,1]) ## @end group ## @end example ## @end deftypefn ## @seealso{infogain_seq} function H = infoentr_seq(x,y) if(nargin<1  nargin>2) usage("infoentr_seq(x,y)") endif if(nargin==2) if((rows(x)~=rows(y))  (columns(x)~=columns(y))) error("Arguments do not have same dimension.") endif endif # We check that first argument is a vector, and # if necessary convert to row vector. if(columns(x)==1) x = x.' elseif(rows(x)~=1) error("First argument is not a vector."); endif if(nargin==1) X = create_set(x); Nx = length(X); # Calculate probability Pr(x) for i=1:Nx Pr(i) = sum(x==X(i)); endfor if(sum(Pr) ~= length(x)) fprintf(stdout,"Sum is wrong.\n"); endif Pr = Pr/length(x); # Calculate Shannon information content h(x) = log2(1/Pr(x)) h = log2(1 ./ Pr); h(find(h==Inf)) = 0; H = sum(Pr .* h); else # Ensure that the second argument is a vector, and # if necessary convert to row vector. Actually # this is probably taken care of by the check on # dimension agreement and the check on x above. :) if(columns(y)==1) y = y.' elseif(rows(y)~=1) error("Second argument is not a vector."); endif X = create_set(x); Y = create_set(y); Nx = length(X); Ny = length(Y); # Calculate joint probability Pr(x,y) for i=1:Nx for j=1:Ny Pr(i,j) = (x==X(i))*(y==Y(j)).'; endfor endfor if sum(sum(Pr)) ~= length(x) fprintf(stdout,"Sum is wrong.\n"); endif Pr = Pr/length(x); # Calculate Shannon information content h(x,y) = log2(1/Pr(x,y)) h = log2(1 ./ Pr); [GGx,GGy]=find(Pr==0); if ~isempty(GGx) h(GGx,GGy)= 0; end H = sum(sum(Pr .* h)); endif end %! %! %! 