[Octave-cvsupdate] SF.net SVN: octave:[7498] trunk/octave-forge/extra

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 7498
          http://octave.svn.sourceforge.net/octave/?rev=7498&view=rev
Author:   cdemills
Date:     2010-08-05 16:23:32 +0000 (Thu, 05 Aug 2010)

Log Message:
-----------
Adding package dataframe

Added Paths:
-----------
    trunk/octave-forge/extra/dataframe/
    trunk/octave-forge/extra/dataframe/@dataframe/
    trunk/octave-forge/extra/dataframe/@dataframe/cat.m
    trunk/octave-forge/extra/dataframe/@dataframe/dataframe.m
    trunk/octave-forge/extra/dataframe/@dataframe/display.m
    trunk/octave-forge/extra/dataframe/@dataframe/end.m
    trunk/octave-forge/extra/dataframe/@dataframe/fold.m
    trunk/octave-forge/extra/dataframe/@dataframe/minus.m
    trunk/octave-forge/extra/dataframe/@dataframe/numel.m
    trunk/octave-forge/extra/dataframe/@dataframe/plus.m
    trunk/octave-forge/extra/dataframe/@dataframe/private/
    trunk/octave-forge/extra/dataframe/@dataframe/private/df_basecomp.m
    trunk/octave-forge/extra/dataframe/@dataframe/private/df_check_char_array.m
    trunk/octave-forge/extra/dataframe/@dataframe/private/df_name2idx.m
    trunk/octave-forge/extra/dataframe/@dataframe/private/df_pad.m
    trunk/octave-forge/extra/dataframe/@dataframe/private/df_strjust.m
    trunk/octave-forge/extra/dataframe/@dataframe/private/df_strset.m
    trunk/octave-forge/extra/dataframe/@dataframe/rationale.txt
    trunk/octave-forge/extra/dataframe/@dataframe/size.m
    trunk/octave-forge/extra/dataframe/@dataframe/subsasgn.m
    trunk/octave-forge/extra/dataframe/@dataframe/subsref.m
    trunk/octave-forge/extra/dataframe/@dataframe/summary.m
    trunk/octave-forge/extra/dataframe/data_test.csv
    trunk/octave-forge/extra/dataframe/dataframe
    trunk/octave-forge/extra/dataframe/octave_frame.zip

Added: trunk/octave-forge/extra/dataframe/@dataframe/cat.m
===================================================================

--- trunk/octave-forge/extra/dataframe/@dataframe/cat.m	                        (rev 0)
+++ trunk/octave-forge/extra/dataframe/@dataframe/cat.m	2010-08-05 16:23:32 UTC (rev 7498)
@@ -0,0 +1,116 @@
+function resu = cat(dim, A, varargin)
+  %# function resu = cat(dim, A, varargin)
+  %# This is the concatenation operator for a dataframe object. "Dim"
+  %# has the same meaning as ordinary cat. Next arguments may be
+  %# dataframe, vector/matrix, or two elements cells. First one is taken
+  %# as row/column name, second as data.
+
+  %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...>
+  %%
+  %% This file is part of Octave.
+  %%
+  %% Octave is free software; you can redistribute it and/or
+  %% modify it under the terms of the GNU General Public
+  %% License as published by the Free Software Foundation;
+  %% either version 2, or (at your option) any later version.
+  %%
+  %% Octave is distributed in the hope that it will be useful,
+  %% but WITHOUT ANY WARRANTY; without even the implied
+  %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+  %% PURPOSE.  See the GNU General Public License for more
+  %% details.
+  %%
+  %% You should have received a copy of the GNU General Public
+  %% License along with Octave; see the file COPYING.  If not,
+  %% write to the Free Software Foundation, 59 Temple Place -
+  %% Suite 330, Boston, MA 02111-1307, USA.
+  
+  %#
+  %# $Id: cat.m 1025 2010-08-02 08:55:55Z dupuis $
+  %#
+
+  switch dim
+    case 1
+      resu = A;
+          
+      for indi=1:length(varargin),
+	B = varargin{indi};
+	if !isa(B, 'dataframe'),
+	  if iscell(B) && 2 == length(B),
+	    B = dataframe(B{2}, 'rownames', B{1});
+	  else
+	    B = dataframe(B, 'colnames', inputname(2+indi));
+	  endif
+	endif
+	if resu._cnt(2) != B._cnt(2),
+	  error('Different number of columns in dataframes');
+	  endif
+	  %# do not duplicate empty names
+	  if !isempty(resu._name{1}) || !isempty(B._name{1}),
+	    if length(resu._name{1}) < resu._cnt(1),
+	      resu._name{1}(end+1:resu._cnt(1), 1) = {''};
+	    endif
+	    if length(B._name{1}) < B._cnt(1),
+	      B._name{1}(end+1:B._cnt(1), 1) = {''};
+	    endif
+	    resu._name{1} = vertcat(resu._name{1}(:),  B._name{1}(:));
+	    resu._over{1} = [resu._over{1} B._over{1}];
+	  endif
+	  resu._cnt(1) = resu._cnt(1) + B._cnt(1);
+	  resu._ridx = [resu._ridx(:); B._ridx(:)];
+	  %# find data with same column names
+	  indr = logical(ones(1, resu._cnt(2)));
+	  indb = logical(ones(1, resu._cnt(2)));
+	  indi = 1;
+	  while indi <= resu._cnt(2),
+	    indj = strmatch(resu._name{2}(indi), B. _name{2});
+	    if ~isempty(indj),
+	      indj = indj(1);
+	      if ~strcmp(resu._type{indi}, B._type{indj}),
+		error("Trying to mix columns of different types");
+	      endif
+	      resu._data{indi} = [resu._data{indi}; B._data{indj}];
+	      indr(indi) = false; indb(indj) = false;
+	    endif
+	    indi = indi + 1;
+	  endwhile
+	  if any(indr) || any(indb)
+	    error('Different number/names of columns in dataframe');
+	  endif
+	endfor
+
+      case 2
+	resu = A;
+	for indi=1:length(varargin),
+	  B = varargin{indi};
+	  if !isa(B, 'dataframe'),
+	    if iscell(B) && 2 == length(B),
+	      B = dataframe(B{2}, 'colnames', B{1});
+	    else
+	      B = dataframe(B, 'colnames', inputname(2+indi));
+	    endif
+	    B._ridx = resu._ridx; %# make them compatibles
+	  endif
+	  if resu._cnt(1) != B._cnt(1),
+	    error('Different number of rows in dataframes');
+	  endif
+	  if any(resu._ridx(:) - B._ridx(:))
+	    error('dataframes row indexes not matched');
+	  endif
+	  resu._name{2} = vertcat(resu._name{2}, B._name{2});
+	  resu._over{2} = [resu._over{2} B._over{2}];
+	  indj = resu._cnt(2) + 1;
+	  for indi = 1:B._cnt(2),
+	    resu._data{indj} = B._data{indi};
+	    resu._type{indj} = B._type{indi};
+	    indj = indj + 1;
+	  endfor
+	  resu._cnt(2) = resu._cnt(2) + B._cnt(2);	
+	endfor
+
+      otherwise
+	error('Incorrect call to cat');
+  endswitch
+
+  %#  disp('End of cat'); keyboard
+endfunction

Added: trunk/octave-forge/extra/dataframe/@dataframe/dataframe.m
===================================================================
--- trunk/octave-forge/extra/dataframe/@dataframe/dataframe.m	                        (rev 0)
+++ trunk/octave-forge/extra/dataframe/@dataframe/dataframe.m	2010-08-05 16:23:32 UTC (rev 7498)
@@ -0,0 +1,269 @@
+function df = dataframe(x = [], varargin)
+  
+  %# -*- texinfo -*-
+  %#  @deftypefn {Function File} @var{df} = dataframe(@var{x = []}, ...)
+  %# This is the default constructor for a dataframe object, which is
+  %# similar to R 'data.frame'. It's a way to group tabular data, then
+  %# accessing them either as matrix or by column name.
+  %# Input argument x may be: @itemize
+  %# @item a dataframe => use @var{varargin} to pad it with suplemental
+  %# columns
+  %# @item a matrix => create column names from input name; each column
+  %# is used as an entry
+  %# @item a cell matrix => try to infer column names from the first row,
+  %#   and row indexes and names from the two first columns;
+  %# @item a file name => import data into a dataframe;
+  %# @item a matrix of char => initialise colnames from them.
+  %# @item a two-element cell: use the first as column as column to
+  %# append to,  and the second as initialiser for the column(s)
+  %# @end itemize
+  %# If called with an empty value, or with the default argument, it
+  %# returns an empty dataframe which can be further populated by
+  %# assignement, cat, ... If called without any argument, it should
+  %# return a dataframe from the whole workspace. 
+  %# @*Variable input arguments are first parsed as pairs (options, values).
+  %# Recognised options are: @itemize
+  %# @item rownames : take the values as initialiser for row names
+  %# @item colnames : take the values as initialiser for column names
+  %# @item seeked : a filed value which triggers start of processing.
+  %# Each preceeding line is silently skipped. Default: none
+  %# @item unquot: a logical switch telling wheter or not strings should
+  %# be unquoted before storage, default = true;
+  %# @end itemize
+  %# The remaining data are concatenanted (right-appended) to the existing ones.
+  %# @end deftypefn
+
+  %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...>
+  %%
+  %% This file is part of Octave.
+  %%
+  %% Octave is free software; you can redistribute it and/or
+  %% modify it under the terms of the GNU General Public
+  %% License as published by the Free Software Foundation;
+  %% either version 2, or (at your option) any later version.
+  %%
+  %% Octave is distributed in the hope that it will be useful,
+  %% but WITHOUT ANY WARRANTY; without even the implied
+  %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+  %% PURPOSE.  See the GNU General Public License for more
+  %% details.
+  %%
+  %% You should have received a copy of the GNU General Public
+  %% License along with Octave; see the file COPYING.  If not,
+  %% write to the Free Software Foundation, 59 Temple Place -
+  %% Suite 330, Boston, MA 02111-1307, USA.
+
+  %#
+  %# $Id: dataframe.m 1036 2010-08-03 16:24:01Z dupuis $
+  %#
+
+if 0 == nargin
+  disp('FIXME -- should create a dataframe from the whole workspace')
+  return
+endif
+
+if isempty(x),
+  %# default constructor: initialise the fields in the right order
+  df._cnt = [0 0];
+  df._name = {cell(0, 1), cell(1, 0)}; %# rows - cols 
+  df._over = cell(1, 2);
+  df._ridx = [];  
+  df._data = cell(0, 0); 
+  df._type = cell(0, 0);
+  df = class(df, 'dataframe');
+  return
+endif
+
+if isa(x, 'dataframe')
+  df = x;
+elseif isa(x, 'struct'),
+  df = class(x, 'dataframe'); return
+else
+  df = dataframe([]); %# get the right fields
+endif
+
+seeked = []; unquot = true; 
+
+if length(varargin) > 0,
+  indi = 1;
+  %# loop over possible arguments
+  while indi <= size(varargin, 2),
+    switch(varargin{indi})
+      case 'rownames'
+	if !iscell(varargin{indi+1}),
+	  df._name{1} = cellstr(varargin{indi+1});
+	else
+	  df._name{1} = varargin{indi+1};
+	endif
+	df._over{1}(1, 1:length(df._name{1})) = false;
+	df._cnt(1) = size(df._name{1}, 1);
+	varargin(indi:indi+1) = [];
+      case 'colnames'
+	if !iscell(varargin{indi+1}),
+	  df._name{2} = cellstr(varargin{indi+1});
+	else
+	  df._name{2} = varargin{indi+1};
+	endif
+	dummy = cellfun(@(x) strsplit(x, '='), df._name{2}, ...
+			"UniformOutput", false);
+	disp('line 89 '); keyboard
+	df._over{2}(1, 1:length(df._name{2})) = false;
+	varargin(indi:indi+1) = [];
+      case 'seeked',
+	seeked = varargin{indi + 1};
+	varargin(indi:indi+1) = [];
+      case 'unquot',
+	unquot = varargin{indi + 1};
+	varargin(indi:indi+1) = [];
+      otherwise %# FIXME: just skip it for now
+	indi = indi + 1;
+    endswitch
+  endwhile
+endif
+
+indi = 0; 
+while indi <= size(varargin, 2),
+  indi = indi + 1;
+  if ~isa(x, 'dataframe')
+    if isa(x, 'char') && size(x, 1) < 2,
+      %# read the data frame from a file
+      try
+	x = load(tilde_expand(x));
+      catch
+        UTF8_BOM = char([0xEF 0xBB 0xBF]);
+	unwind_protect
+	  fid = fopen(tilde_expand(x));
+	  dummy = fgetl(fid);
+	  if !strcmp(dummy, UTF8_BOM),
+	    frewind(fid);
+	  endif
+	  in = fscanf(fid, "%c"); %# slurps everything
+	unwind_protect_cleanup
+	  fclose(fid);
+	end_unwind_protect
+	lines = regexp(in,'(^|\n)([^\n]+)', 'match'); %# cut into lines
+	content = cellfun(@(x) regexp(x, '(\b|'')[^,]+(''|\b)', 'match'), ...
+			  lines, 'UniformOutput', false); %# extract fields
+	indl = 1; indj = 1; %# disp('line 151 '); keyboard
+	if ~isempty(seeked),
+	  while indl <= length(lines),
+	    dummy = content{indl};
+	    if strcmp(dummy{1}, seeked)
+	      break;
+	    endif
+	    indl = indl + 1;
+	  endwhile
+	else
+	  dummy = content{indl};
+	endif
+	x = cell(1+length(lines)-indl, size(dummy, 2)); 
+	while indl <= length(lines),
+	  dummy = content{indl};
+	  %# try to convert to float
+	  the_line = cellfun(@(x) sscanf(x, "%f"), dummy, ...
+			     'UniformOutput', false);
+	  for indk = 1: size(the_line, 2),
+	    if isempty(the_line{indk}) || any(size(the_line{indk}) > 1), 
+	      %#if indi > 1 && indk > 1, disp('line 117 '); keyboard; endif
+	      if unquot,
+		try
+		  x(indj, indk) = regexp(dummy{indk}, '[^''].*[^'']', 'match'){1};
+		catch
+		  %# if the previous test fails, try a simpler one
+		  in = regexp(dummy{indk}, '[^'']+', 'match');
+		  if !isempty(in),
+		    x(indj, indk) = in{1};
+		  else
+		    x(indj, indk) = [];
+		  endif
+		end_try_catch
+	      else
+		x(indj, indk) = dummy{indk}; %# no conversion possible
+	      endif
+	    else
+	      x(indj, indk) = the_line{indk}; 
+	    endif
+	  endfor
+	  indl = indl + 1; indj = indj + 1;
+	endwhile
+	clear UTF8_BOM fid in lines indl the_line content
+      end_try_catch
+    endif
+    
+    %# fallback, avoiding a recursive call
+    idx.type = '()';
+
+    if iscell(x),
+      if 2 == length(x),
+	%# use the intermediate value as destination column
+	[indc, ncol] = df_name2idx(df._name{2}, x{1}, df._cnt(2), "column");
+	if ncol != 1,
+	  error(["With two-elements cell, the first should resolve " ...
+		 "to a single column"]);
+	endif
+	try
+	  dummy = cellfun('class', x{2}(2, :), 'UniformOutput', false);
+	catch
+	  dummy = cellfun('class', x{2}(1, :), 'UniformOutput', false);
+	end_try_catch
+	df = df_pad(df, 2, [length(dummy) indc], dummy);
+	x = x{2}; 
+	indj =  indc + (1:size(x, 2));
+      else
+	indj = df._cnt(2)+(1:size(x, 2));
+      endif
+      if length(df._name{2}) < indj(1) || isempty(df._name{2}(indj)),
+	[df._name{2}(indj, 1),  df._over{2}(1, indj)] ...
+	    = df_colnames(inputname(indi), indj);
+      endif
+      %# allow overwriting of column names
+      df._over{2}(1, indj) = true;
+    else
+      if length(df._name{2}) < indj(1) || isempty(df._name{2}(indj)),
+	[df._name{2}(indj, 1),  df._over{2}(1, indj)] ...
+	    = df_colnames(inputname(indi), indj);
+      endif
+    endif
+    idx.subs = {'', indj};
+    %# use direct assignement
+    df = subsasgn(df, idx, x);
+  elseif indi > 1,
+    error('Concatenating dataframes: use cat instead');
+  endif
+
+  try
+    %# loop over next variable argument
+    x = varargin{1, indi};   
+  catch
+    %#   disp('line 197 ???');
+  end_try_catch
+
+endwhile
+
+endfunction
+
+function [x, y] = df_colnames(base, num)
+  %# small auxiliary function to generate column names. This is required
+  %# here, as only the constructor can use inputname()
+  if any([index(base, "=")]),
+    %# takes the left part as base
+    x = strsplit(base, "=");
+    x = deblank(x{1}); y = false;
+  elseif any([index(base, '''')]),
+    %# base is most probably a filename
+    x =  regexp(base, '[^''].*[^'']', 'match'){1}; y = true;
+  elseif any([index(base, "(") index(base, ":")]),
+    x = 'X'; y = true; %# this is a default value, may be changed
+  else
+    x = base; y = false;
+  endif
+
+  if numel(num) > 1,
+    x = repmat(x, numel(num), 1);
+    x = cstrcat(x, strjust(num2str(num(:)), 'left'));
+    y = repmat(y, 1, numel(num));
+  endif
+  
+  x = cellstr(x);
+    
+endfunction

Added: trunk/octave-forge/extra/dataframe/@dataframe/display.m
===================================================================
--- trunk/octave-forge/extra/dataframe/@dataframe/display.m	                        (rev 0)
+++ trunk/octave-forge/extra/dataframe/@dataframe/display.m	2010-08-05 16:23:32 UTC (rev 7498)
@@ -0,0 +1,162 @@
+function resu = display(df)
+
+  %# function resu = display(df)
+  %# Tries to produce a nicely formatted output of a dataframe.
+
+  %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...>
+  %%
+  %% This file is part of Octave.
+  %%
+  %% Octave is free software; you can redistribute it and/or
+  %% modify it under the terms of the GNU General Public
+  %% License as published by the Free Software Foundation;
+  %% either version 2, or (at your option) any later version.
+  %%
+  %% Octave is distributed in the hope that it will be useful,
+  %% but WITHOUT ANY WARRANTY; without even the implied
+  %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+  %% PURPOSE.  See the GNU General Public License for more
+  %% details.
+  %%
+  %% You should have received a copy of the GNU General Public
+  %% License along with Octave; see the file COPYING.  If not,
+  %% write to the Free Software Foundation, 59 Temple Place -
+  %% Suite 330, Boston, MA 02111-1307, USA.
+  
+  %#
+  %# $Id: display.m 1027 2010-08-02 08:59:39Z dupuis $
+  %#
+
+%# generate header name
+if 2 == length(df._cnt),
+  head = sprintf("Dataframe with %d rows and %d columns", df._cnt);
+else
+  head = sprintf("Dataframe with %d rows and %d columns (%d unfolded)", ...
+		 df._cnt);
+endif
+if all(df._cnt > 0), %# stop for empty df
+  vspace = repmat(' ', df._cnt(1), 1);
+  indi = 1; %# the real, unfolded index
+  for indc = 1:df._cnt(2), %# loop over columns
+    %# emit column names and type
+    if 1 == size(df._data{indc}, 2),
+      dummy{1, 2+indi} = deblank(disp(df._name{2}{indc}));
+      dummy{2, 2+indi} = deblank(df._type{indc});
+    else
+      %# append a dot and the third-dimension index to column name
+      tmp_str = [deblank(disp(df._name{2}{indc})) "."];
+      tmp_str = arrayfun(@(x) horzcat(tmp_str, num2str(x)), ...
+			 (1:size(df._data{indc}, 2)), 'UniformOutput', false); 
+      dummy{1, 2+indi} = tmp_str{1};
+      dummy{2, 2+indi} = deblank(df._type{indc});
+      indk = 1; while indk < size(df._data{indc}, 2),
+	dummy{1, 2+indi+indk} = tmp_str{1+indk};
+	dummy{2, 2+indi+indk} = dummy{2, 2+indi};
+	indk = indk + 1;
+      endwhile
+    endif
+    %# "print" each column
+    switch df._type{indc}
+      case {'char'}
+	indk = 1; while indk <= size(df._data{indc}, 2),
+	  tmp_str = df._data{indc}(:, indk); %#get the whole column
+	  indj = cellfun('isprint', tmp_str, 'UniformOutput', false); 
+	  indj = ~cellfun('all', indj);
+	  for indr = 1:length(indj),
+	    if indj(indr),
+	      if isna(tmp_str{indr}),
+		tmp_str{indr} = "NA";
+	      else
+		tmp_str{indr} = undo_string_escapes(tmp_str{indr});
+	      endif
+	    endif
+	  endfor
+	  %# keep the whole thing, and add a vertical space
+	  dummy{3, 2+indi} = disp(char(tmp_str));
+	  dummy{3, 2+indi} = horzcat...
+	      (vspace, char(regexp(dummy{3, 2+indi}, '.*', ...
+				   'match', 'dotexceptnewline')));
+	  indi = indi + 1; indk = indk + 1;
+	endwhile
+      otherwise
+	%# keep only one horizontal space per line
+	indk = 1; while indk <= size(df._data{indc}, 2),
+	  dummy{3, 2+indi} = disp(df._data{indc}(:, indk));
+	  tmp_str = char(regexp(dummy{3, 2+indi}, ' \S.*', ...
+				'match', 'dotexceptnewline'));
+	  if size(tmp_str, 1) < df._cnt(1),
+	    tmp_str = horzcat...
+		(vspace, char(regexp(dummy{3, 2+indi}, '\S.*', ...
+				     'match', 'dotexceptnewline')));
+	  endif
+	  dummy{3, 2+indi} = tmp_str;
+	  indi = indi + 1; indk = indk + 1;
+	endwhile
+    endswitch
+  endfor
+
+  vspace = [' '; ' '; vspace];
+  %# second line content
+  if 1 == size(df._ridx, 2),
+    dummy{2, 1} = ["_"; "Nr"];
+    dummy{3, 1} = disp(df._ridx(:)); 
+    indi = regexp(dummy{3, 1}, '\b.*\b', 'match', 'dotexceptnewline');
+    resu = strjust(char(dummy{2, 1}, indi), 'right');
+  else
+    resu = [];
+    for indi = 1:size(df._ridx, 2)-1,
+      dummy{2, 1} = [["_." num2str(indi)]; "Nr"];
+      dummy{3, 1} = disp(df._ridx(:, indi)); 
+      indj = regexp(dummy{3, 1}, '\b.*\b', 'match', 'dotexceptnewline');
+      resu = horzcat(resu, strjust(char(dummy{2, 1}, indj), 'right'), vspace);
+    endfor
+    dummy{2, 1} = [["_." num2str(indi+1)]; "Nr"];
+    dummy{3, 1} = disp(df._ridx(:, end)); 
+    indj = regexp(dummy{3, 1}, '\b.*\b', 'match', 'dotexceptnewline');
+    resu = horzcat(resu, strjust(char(dummy{2, 1}, indj), 'right'));
+  endif
+  %# emit row names
+  if isempty(df._name{1}),
+    dummy{2, 2} = []; dummy{3, 2} = [];
+  else
+    dummy{2, 2} = [" ";" "];
+    dummy{3, 2} = df._name{1};
+  endif
+  
+  if size(dummy, 2) > 1,
+    %# resu contains the ridx
+   
+    %# insert a vertical space
+    if !isempty(dummy{3, 2}),
+      indi = ~cellfun('isempty', dummy{3, 2});
+      if any(indi),
+	resu = horzcat(resu, vspace);
+	resu = horzcat(resu, strjust(char(dummy{2, 2}, dummy{3,2}), 'right'));
+      endif
+    endif
+    
+    %# emit each colum
+    for indi = 1:max(df._cnt(2:end)),
+      try
+	%# avoid this column touching the previous one
+	if any(cellfun('size', dummy(1:2, 2+indi), 2) >= ...
+	       size(dummy{3, 2+indi}, 2)),
+	  resu = horzcat(resu, vspace);
+	endif
+	resu = horzcat(resu, strjust(char(dummy{:, 2+indi}), 'right'));
+      catch
+	tmp_str = sprintf("Emitting %d lines, expecting %d", ...
+			  size(dummy{3, 2+indi}, 1), df._cnt(1));
+	error(tmp_str);
+      end_try_catch
+    endfor
+  else
+    resu = '';
+  endif
+else
+  resu = '';
+endif
+
+resu = char(head, resu); disp(resu)
+
+

Added: trunk/octave-forge/extra/dataframe/@dataframe/end.m
===================================================================
--- trunk/octave-forge/extra/dataframe/@dataframe/end.m	                        (rev 0)
+++ trunk/octave-forge/extra/dataframe/@dataframe/end.m	2010-08-05 16:23:32 UTC (rev 7498)
@@ -0,0 +1,36 @@
+function resu = end(df, k, n)
+  %# function resu = end(df, k, n)
+  %# This is the end operator for a dataframe object, returning the
+  %# maximum number of rows or columns
+
+  %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...>
+  %%
+  %% This file is part of Octave.
+  %%
+  %% Octave is free software; you can redistribute it and/or
+  %% modify it under the terms of the GNU General Public
+  %% License as published by the Free Software Foundation;
+  %% either version 2, or (at your option) any later version.
+  %%
+  %% Octave is distributed in the hope that it will be useful,
+  %% but WITHOUT ANY WARRANTY; without even the implied
+  %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+  %% PURPOSE.  See the GNU General Public License for more
+  %% details.
+  %%
+  %% You should have received a copy of the GNU General Public
+  %% License along with Octave; see the file COPYING.  If not,
+  %% write to the Free Software Foundation, 59 Temple Place -
+  %% Suite 330, Boston, MA 02111-1307, USA.
+  
+  %#
+  %# $Id: end.m 852 2010-07-22 10:47:55Z dupuis $
+  %#
+
+  try
+    resu = df._cnt(k);
+  catch
+    error("incorrect call to end, index greater than number of dimensions");
+  end_try_catch
+
+endfunction

Added: trunk/octave-forge/extra/dataframe/@dataframe/fold.m
===================================================================
--- trunk/octave-forge/extra/dataframe/@dataframe/fold.m	                        (rev 0)
+++ trunk/octave-forge/extra/dataframe/@dataframe/fold.m	2010-08-05 16:23:32 UTC (rev 7498)
@@ -0,0 +1,95 @@
+function resu = fold(df, dim, indr, indc)
+
+  %# function resu = subasgn(df, S, RHS)
+  %# The purpose is to fold a dataframe. Part from (1:indr-1) doesn't
+  %# move, then content starting at indr is moved into the second,
+  %# third, ... sheet. To be moved, there must be equality of rownames,
+  %# if any, and of fields contained in indc.
+
+
+  %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...>
+  %%
+  %% This file is part of Octave.
+  %%
+  %% Octave is free software; you can redistribute it and/or
+  %% modify it under the terms of the GNU General Public
+  %% License as published by the Free Software Foundation;
+  %% either version 2, or (at your option) any later version.
+  %%
+  %% Octave is distributed in the hope that it will be useful,
+  %% but WITHOUT ANY WARRANTY; without even the implied
+  %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+  %% PURPOSE.  See the GNU General Public License for more
+  %% details.
+  %%
+  %% You should have received a copy of the GNU General Public
+  %% License along with Octave; see the file COPYING.  If not,
+  %% write to the Free Software Foundation, 59 Temple Place -
+  %% Suite 330, Boston, MA 02111-1307, USA.
+  
+  %#
+  %# $Id: fold.m 1016 2010-07-30 13:49:35Z dupuis $
+  %#
+switch dim
+  case 1
+    [indr, nrow] = df_name2idx(df._name{1}, indr, df._cnt(1), 'row');
+    [indc, ncol] = df_name2idx(df._name{2}, indc, df._cnt(2), 'column');
+    
+    if indr(1) > 1,
+      slice_size = indr(1) - 1;
+      %# we can't use directly resu = df(1:slice_size, :, :)
+      S.type = '()';
+      S.subs = { 1:slice_size, ':', ':', 'dataframe'};
+      resu = subsref(df, S);
+      
+      %# how many columns for each slice
+      targets = cellfun('size', df._data, 2); 
+      %# a test function to determine if the location is free
+      for indj = 1:df._cnt(2),
+	if any(indj == indc),
+	  continue;
+	endif
+	switch df._type{indj}
+	  case { 'char' }
+	    testfunc{indj} = @(x, indr, indc) ...
+		!isna(x{indr, indc});
+	  otherwise
+	    testfunc{indj} = @(x, indr, indc) ...
+		!isna(x(indr, indc));
+	endswitch
+      endfor
+
+      for indi = indr,
+	%# where does this line go ?
+	where = find(df._data{indc}(1:slice_size, 1) ...
+		     == df._data{indc}(indi, 1));
+	if !isempty(where),
+	  %# transfering one line -- loop over columns
+	  for indj = 1:df._cnt(2),
+	    if any(indj == indc),
+	      continue;
+	    endif
+	   
+	    if testfunc{indj}(resu._data{indj}, where, targets(indj))
+	      %# add one more sheet
+	      resu = df_pad(resu, 3, 1, indj);
+	      targets(indj) = targets(indj) + 1;
+	    endif
+	    %# transfer field
+	    resu._data{indj}(where, targets(indj)) = ...
+		df._data{indj}(indi, 1);
+	  endfor
+	  %# update row index
+	  resu._ridx(where, max(targets)) = df._ridx(indi);
+	else
+	  disp('line 65: FIXME'); keyboard;
+	endif
+      endfor
+
+    else
+
+      disp('line 70: FIXME '); keyboard
+    endif
+
+
+endswitch

Added: trunk/octave-forge/extra/dataframe/@dataframe/minus.m
===================================================================
--- trunk/octave-forge/extra/dataframe/@dataframe/minus.m	                        (rev 0)
+++ trunk/octave-forge/extra/dataframe/@dataframe/minus.m	2010-08-05 16:23:32 UTC (rev 7498)
@@ -0,0 +1,66 @@
+function resu = minus(A, B);
+
+  %# function resu = minus(A, B)
+  %# Implements the '-' operator when at least one one argument is a dataframe.
+
+  %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...>
+  %%
+  %% This file is part of Octave.
+  %%
+  %% Octave is free software; you can redistribute it and/or
+  %% modify it under the terms of the GNU General Public
+  %% License as published by the Free Software Foundation;
+  %% either version 2, or (at your option) any later version.
+  %%
+  %% Octave is distributed in the hope that it will be useful,
+  %% but WITHOUT ANY WARRANTY; without even the implied
+  %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+  %% PURPOSE.  See the GNU General Public License for more
+  %% details.
+  %%
+  %% You should have received a copy of the GNU General Public
+  %% License along with Octave; see the file COPYING.  If not,
+  %% write to the Free Software Foundation, 59 Temple Place -
+  %% Suite 330, Boston, MA 02111-1307, USA.
+  
+  %#
+  %# $Id: minus.m 852 2010-07-22 10:47:55Z dupuis $
+  %#
+
+  [A, B] = df_basecomp(A, B);
+
+  if isscalar(A) 
+    %# B is a dataframe
+    resu = B; 
+    for indi = 1:B._cnt(2),
+      resu._data{indi} = A-B._data{indi};
+    endfor
+    return
+  elseif ismatrix(A),
+    resu = B; 
+    for indi = 1:B._cnt(2),
+      resu._data{indi} = A(indi, :) - B._data{indi};
+    endfor
+    return
+  endif
+
+  if isscalar(B),
+    resu = A; 
+    for indi = 1:A._cnt(2),
+      resu._data{indi} = A._data{indi} -B;
+    endfor
+    return
+  elseif ismatrix(B),
+    resu = A; 
+    for indi = 1:A._cnt(2),
+      resu._data{indi} = A._data{indi} -B(:, indi);
+    endfor
+    return
+  endif
+
+  resu = A; 
+  for indi = 1:A._cnt(2),
+    resu._data{indi} = A._data{indi} -B._data{indi};
+  endfor
+  
+endfunction

Added: trunk/octave-forge/extra/dataframe/@dataframe/numel.m
===================================================================
--- trunk/octave-forge/extra/dataframe/@dataframe/numel.m	                        (rev 0)
+++ trunk/octave-forge/extra/dataframe/@dataframe/numel.m	2010-08-05 16:23:32 UTC (rev 7498)
@@ -0,0 +1,41 @@
+function n = numel(df, varargin)
+  %# function resu = end(df, varargin)
+  %# This is the numel operator for a dataframe object, returning the
+  %# product of the  number of rows by the number of columns
+
+  %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...>
+  %%
+  %% This file is part of Octave.
+  %%
+  %% Octave is free software; you can redistribute it and/or
+  %% modify it under the terms of the GNU General Public
+  %% License as published by the Free Software Foundation;
+  %% either version 2, or (at your option) any later version.
+  %%
+  %% Octave is distributed in the hope that it will be useful,
+  %% but WITHOUT ANY WARRANTY; without even the implied
+  %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+  %% PURPOSE.  See the GNU General Public License for more
+  %% details.
+  %%
+  %% You should have received a copy of the GNU General Public
+  %% License along with Octave; see the file COPYING.  If not,
+  %% write to the Free Software Foundation, 59 Temple Place -
+  %% Suite 330, Boston, MA 02111-1307, USA.
+  
+  %#
+  %# $Id: numel.m 981 2010-07-26 16:23:08Z dupuis $
+  %#
+
+if 1 == nargin,
+  n = prod(df._cnt([1 end]));
+else
+  error(print_usage());
+endif
+
+endfunction
+
+function usage = print_usage()
+  usage = strcat('Invalid call to numel.  Correct usage is: ', ' ', ...
+		  '-- Overloaded Function:  numel (A)');
+endfunction

Added: trunk/octave-forge/extra/dataframe/@dataframe/plus.m
===================================================================
--- trunk/octave-forge/extra/dataframe/@dataframe/plus.m	                        (rev 0)
+++ trunk/octave-forge/extra/dataframe/@dataframe/plus.m	2010-08-05 16:23:32 UTC (rev 7498)
@@ -0,0 +1,54 @@
+function resu = minus(A, B);
+
+  %# function resu = minus(A, B)
+  %# Implements the '-' operator when at least one one argument is a dataframe.
+
+  %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...>
+  %%
+  %% This file is part of Octave.
+  %%
+  %% Octave is free software; you can redistribute it and/or
+  %% modify it under the terms of the GNU General Public
+  %% License as published by the Free Software Foundation;
+  %% either version 2, or (at your option) any later version.
+  %%
+  %% Octave is distributed in the hope that it will be useful,
+  %% but WITHOUT ANY WARRANTY; without even the implied
+  %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+  %% PURPOSE.  See the GNU General Public License for more
+  %% details.
+  %%
+  %% You should have received a copy of the GNU General Public
+  %% License along with Octave; see the file COPYING.  If not,
+  %% write to the Free Software Foundation, 59 Temple Place -
+  %% Suite 330, Boston, MA 02111-1307, USA.
+  
+  %#
+  %# $Id: plus.m 852 2010-07-22 10:47:55Z dupuis $
+  %#
+
+  [A, B] = df_basecomp(A, B);
+
+  if isscalar(A) 
+    %# B is a dataframe
+    resu = B; 
+    for indi = 1:B._cnt(2),
+      resu._data{indi} = A+B._data{indi};
+    endfor
+    return
+  endif
+
+  if isscalar(B),
+    resu = A; 
+    for indi = 1:A._cnt(2),
+      resu._data{indi} = A._data{indi}+B;
+    endfor
+    return
+  endif
+
+  resu = A; 
+  for indi = 1:A._cnt(2),
+    resu._data{indi} = A._data{indi} + B._data{indi};
+  endfor
+
+endfunction

Added: trunk/octave-forge/extra/dataframe/@dataframe/private/df_basecomp.m
===================================================================
--- trunk/octave-forge/extra/dataframe/@dataframe/private/df_basecomp.m	                        (rev 0)
+++ trunk/octave-forge/extra/dataframe/@dataframe/private/df_basecomp.m	2010-08-05 16:23:32 UTC (rev 7498)
@@ -0,0 +1,62 @@
+function [A, B] = df_basecomp(A, B);
+
+  %# function [A, B] = df_basecomp(A, B)
+  %# Basic size verifcation for binary operations on dataframe. Returns
+  %# a scalar, a matrix, or a dataframe. Cell arrays are converted to df.
+
+  %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...>
+  %%
+  %% This file is part of Octave.
+  %%
+  %% Octave is free software; you can redistribute it and/or
+  %% modify it under the terms of the GNU General Public
+  %% License as published by the Free Software Foundation;
+  %% either version 2, or (at your option) any later version.
+  %%
+  %% Octave is distributed in the hope that it will be useful,
+  %% but WITHOUT ANY WARRANTY; without even the implied
+  %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+  %% PURPOSE.  See the GNU General Public License for more
+  %% details.
+  %%
+  %% You should have received a copy of the GNU General Public
+  %% License along with Octave; see the file COPYING.  If not,
+  %% write to the Free Software Foundation, 59 Temple Place -
+  %% Suite 330, Boston, MA 02111-1307, USA.
+  
+  %#
+  %# $Id: df_basecomp.m 852 2010-07-22 10:47:55Z dupuis $
+  %#
+
+  if isscalar(A)  || isscalar(B)
+    return
+  endif
+
+  if iscell(A), 
+    A = dataframe(A); 
+  elseif iscell(B), 
+    B =  dataframe(B);
+  endif
+  
+  if any(size(A) - size(B)),
+    error("Non compatible sizes");
+  endif
+  if !isa(A, 'dataframe') || !isa(B, 'dataframe'),
+    return; %# don't go further with names/indexes comparisons
+  endif
+
+  if any(A._ridx-B._ridx),
+    error("Non compatible indexes");
+  endif
+  if !isempty(A._name{1}) && !isempty(B._name{1})
+    if !any(strcmp(cellstr(A._name{1}), cellstr(B._name{1}))),
+      error("Incompatible row names");
+    endif
+  endif
+  if !isempty(A._name{2}) && !isempty(B._name{2})
+    if !any(strcmp(cellstr(A._name{2}), cellstr(B._name{2}))),
+      error("Incompatible column names");
+    endif
+  endif
+
+endfunction

Added: trunk/octave-forge/extra/dataframe/@dataframe/private/df_check_char_array.m
===================================================================
--- trunk/octave-forge/extra/dataframe/@dataframe/private/df_check_char_array.m	                        (rev 0)
+++ trunk/octave-forge/extra/dataframe/@dataframe/private/df_check_char_array.m	2010-08-05 16:23:32 UTC (rev 7498)
@@ -0,0 +1,52 @@
+function resu = df_check_char_array(x, nelem, required)
+
+  %# auxiliary function: pad a char array to some width
+
+  %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...>
+  %%
+  %% This file is part of Octave.
+  %%
+  %% Octave is free software; you can redistribute it and/or
+  %% modify it under the terms of the GNU General Public
+  %% License as published by the Free Software Foundation;
+  %% either version 2, or (at your option) any later version.
+  %%
+  %% Octave is distributed in the hope that it will be useful,
+  %% but WITHOUT ANY WARRANTY; without even the implied
+  %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+  %% PURPOSE.  See the GNU General Public License for more
+  %% details.
+  %%
+  %% You should have received a copy of the GNU General Public
+  %% License along with Octave; see the file COPYING.  If not,
+  %% write to the Free Software Foundation, 59 Temple Place -
+  %% Suite 330, Boston, MA 02111-1307, USA.
+  
+  %#
+  %# $Id: df_check_char_array.m 852 2010-07-22 10:47:55Z dupuis $
+  %#
+  
+  if 2 == nargin, required = [nelem 1]; endif
+
+  if nelem < required(1),
+    error("Too many elements to assign");
+  endif
+
+  %# a zero-length element is still considered as a space by char
+  if isempty(x), x = ' '; endif 
+
+  if size(x, 1) < max(required(1), nelem)
+    %# pad vertically
+    dummy = repmat(' ', nelem-size(x, 1), 1);
+    resu = char(x, dummy);
+  else
+    resu = x;
+  endif
+      
+  if size(resu, 2) < required(2),
+    %# pad horizontally
+    dummy = repmat(' ', nelem, required(2)-size(resu, 2));
+    resu = horzcat(resu, dummy);
+  endif
+
+endfunction

Added: trunk/octave-forge/extra/dataframe/@dataframe/private/df_name2idx.m
===================================================================
--- trunk/octave-forge/extra/dataframe/@dataframe/private/df_name2idx.m	                        (rev 0)
+++ trunk/octave-forge/extra/dataframe/@dataframe/private/df_name2idx.m	2010-08-05 16:23:32 UTC (rev 7498)
@@ -0,0 +1,121 @@
+function [idx, nelem, subs] = df_name2idx(names, subs, count, dimname);
+
+  %# This is a helper routine to translate rownames or columnames into
+  %# real index. Input: names, a char array, and subs, a cell array as
+  %# produced by subsref and similar. This routine can also detect
+  %# ranges, two values separated by ':'. On output, subs is
+  %# 'sanitised' from names, and is either a vector, either a single ':'
+
+  %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...>
+  %%
+  %% This file is part of Octave.
+  %%
+  %% Octave is free software; you can redistribute it and/or
+  %% modify it under the terms of the GNU General Public
+  %% License as published by the Free Software Foundation;
+  %% either version 2, or (at your option) any later version.
+  %%
+  %% Octave is distributed in the hope that it will be useful,
+  %% but WITHOUT ANY WARRANTY; without even the implied
+  %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+  %% PURPOSE.  See the GNU General Public License for more
+  %% details.
+  %%
+  %% You should have received a copy of the GNU General Public
+  %% License along with Octave; see the file COPYING.  If not,
+  %% write to the Free Software Foundation, 59 Temple Place -
+  %% Suite 330, Boston, MA 02111-1307, USA.
+  
+  %#
+  %# $Id: df_name2idx.m 1037 2010-08-03 16:25:05Z dupuis $
+  %#
+
+  if isempty(subs),
+    %# not caring about rownames ? Avoid generating an error.
+    idx = []; nelem = 0; return
+  endif
+
+  if isa(subs, 'char'),
+    orig_name = subs;
+    if 1 == size(subs, 1),
+      if strcmp(subs, ':') %# range operator
+	idx = 1:count; nelem = count;
+	return
+      endif
+    endif
+    subs = cellstr(subs);
+  else
+    if !isvector(subs),
+      error("Trying to access column as a matrix");
+    endif
+    switch class(subs)
+      case {"cell" }
+	orig_name = char(subs);
+      otherwise
+	orig_name = num2str(subs);
+    endswitch
+  endif
+ 
+  if isa(subs, 'cell'),
+    subs = subs(:); idx = [];
+    %# translate list of variables to list of indices
+    for indi= 1:size(subs, 1),
+      %# regexp doesn't like empty patterns
+      if isempty(subs{indi}), continue, endif 
+      %# convert from standard pattern to regexp pattern
+      subs{indi} = regexprep(subs{indi}, '([^\.])\*', "$1.*");
+      if 0 == index(subs{indi}, ':'),
+ 	for indj = 1:min(length(names), count), %# sanity check
+	  if ~isempty(regexp(names{indj}, subs{indi})),
+	    idx = [idx indj];
+	  endif
+	endfor
+      else
+	dummy = strsplit( subs{indi}, ':');
+	ind_start = 1;
+	if !isempty(dummy{1}),
+	  ind_start = sscanf(dummy{1}, "%d");
+	  if isempty(ind_start),
+	    ind_start = 1;
+	    for indj = 1:min(length(names), count), %# sanity check
+	      if ~isempty(regexp(names{indj}, subs{indi})),
+		ind_start = indj; break; %# stop at the first match
+	      endif
+	    endfor
+	  endif
+	endif
+	
+	if isempty(dummy{2}) || strcmp(dummy{2}, 'end'),
+	  ind_stop = count;
+	else
+	  ind_stop = sscanf(dummy{2}, "%d");
+	  if isempty(ind_stop),
+	    ind_stop = 1;
+	    for indj = min(length(names), count):-1:1, %# sanity check
+	      if ~isempty(regexp(names{indj}, subs{indi})),
+		ind_stop = indj; break; %# stop at the last match
+	      endif
+	    endfor
+	  endif
+	endif
+	idx = [idx ind_start:ind_stop];
+      endif
+    endfor
+  elseif isa(subs, 'logical'),
+    idx = 1:length(subs);
+    idx(~subs) = [];
+  else
+    idx = subs;
+  endif
+
+  if isempty(idx),
+    keyboard
+    dummy = sprintf("Unknown %s name while searching for %s", ...
+		    dimname, orig_name);
+    error(dummy);
+  endif
+
+  subs = idx;
+  nelem = length(idx);
+  
+endfunction


Property changes on: trunk/octave-forge/extra/dataframe/@dataframe/private/df_name2idx.m
___________________________________________________________________
Added: svn:executable
   + *

Added: trunk/octave-forge/extra/dataframe/@dataframe/private/df_pad.m
===================================================================
--- trunk/octave-forge/extra/dataframe/@dataframe/private/df_pad.m	                        (rev 0)
+++ trunk/octave-forge/extra/dataframe/@dataframe/private/df_pad.m	2010-08-05 16:23:32 UTC (rev 7498)
@@ -0,0 +1,143 @@
+function df = df_pad(df, dim, n, coltype=[])
+  %# function resu = df_pad(df, dim, n, coltype = [])
+  %# given a dataframe, insert n rows or columns, and adjust everything
+  %# accordingly. Coltype is a supplemental argument:
+  %# dim = 1 => not used
+  %# dim = 2 => type of the added column(s)
+  %# dim = 3 => index of columns receiving a new sheet (default: all)
+
+  %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...>
+  %%
+  %% This file is part of Octave.
+  %%
+  %% Octave is free software; you can redistribute it and/or
+  %% modify it under the terms of the GNU General Public
+  %% License as published by the Free Software Foundation;
+  %% either version 2, or (at your option) any later version.
+  %%
+  %% Octave is distributed in the hope that it will be useful,
+  %% but WITHOUT ANY WARRANTY; without even the implied
+  %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+  %% PURPOSE.  See the GNU General Public License for more
+  %% details.
+  %%
+  %% You should have received a copy of the GNU General Public
+  %% License along with Octave; see the file COPYING.  If not,
+  %% write to the Free Software Foundation, 59 Temple Place -
+  %% Suite 330, Boston, MA 02111-1307, USA.
+  
+  %#
+  %# $Id: df_pad.m 1028 2010-08-03 10:10:26Z dupuis $
+  %#
+
+  switch dim
+    case 1
+      if !isempty(df._name{1}),
+	if length(df._name{1}) < df._cnt(1)+n,
+	  %# generate a name for the new row(s)
+	  df._name{1}(df._cnt(1)+(1:n), 1) = {'_'};
+	  df._over{1}(1, df._cnt(1)+(1:n), 1) = true;
+	endif
+      endif
+      %# complete row indexes
+      if isempty(df._ridx),
+	dummy = (1:n).';
+      else
+	dummy = vertcat(df._ridx, repmat(NA, n, size(df._ridx, 2))); 
+      endif
+      df._ridx = dummy;
+      %# pad every line
+      if !isempty(df._data),
+	for indi = 1:df._cnt(2),
+	  m = size(df._data{indi}, 2);
+	  switch df._type{indi}
+	    case {'char'}
+	      dummy = {}; dummy(1:n,1:m) = NA;
+	      dummy = vertcat(df._data{indi}, dummy);
+	    case { 'double' }
+	      dummy = vertcat(df._data{indi}, repmat(NA, n, m));
+	    otherwise
+	      dummy = cast(vertcat(df._data{indi}, repmat(NA, n, m)), ...
+			   df._type{indi});
+	  endswitch
+	  df._data{indi} = dummy;
+	endfor
+      endif
+      df._cnt(1) = df._cnt(1) + n;
+
+    case 2
+      %# create new columns
+      if isempty(coltype)
+	error("df_pad: dim equals 2, and coltype undefined");
+      endif
+      if length(n) > 1, %#second value is an offset
+	indc =  n(2); n = n(1);
+	if indc < df._cnt(2),
+	  %# shift to the right
+	  df._name{2}(n + (indc+1:end)) =  df._name{2}(indc+1:end);
+	  dummy = cstrcat(repmat('_', n, 1), ...
+			  strjust(num2str(indc + (1:n).'), 'left'));
+	  df._name{2}(indc + (1:n)) = cellstr(dummy);	 
+  	  df._over{2}(indc + (1:n)) = true;
+	  df._type(n+(indc+1:end)) = df._type(indc+1:end);
+	  df._type(indc + (1:n)) = NA;
+	  df._data(n + (indc+1:end)) = df._data(indc+1:end);
+	  df._data(indc + (1:n)) = NA;
+	endif
+      else
+	indc = df._cnt(2); %# add new values after the last column
+      endif
+      if !isa(coltype, 'cell'), coltype = {coltype}; endif
+      for indi = (1:n),
+	switch coltype{indi}
+	  case {'char'}
+	    dummy = {repmat(NA, df._cnt(1), 1) }; 
+	    dummy(:, 1) = '_';
+	  case { 'double' }
+	    dummy = repmat(NA, df._cnt(1), 1);
+	  otherwise
+	    dummy = cast(repmat(NA, df._cnt(1), 1), coltype{indi});
+	endswitch
+	df._data{indc+indi} = dummy;
+	df._type{indc+indi} = coltype{indi};
+      endfor
+      
+      if length(df._name{2}) < df._cnt(2)+n,
+      	%# generate a name for the new column(s)
+	dummy = cstrcat(repmat('_', n, 1), ...
+			strjust(num2str(indc + (1:n).'), 'left'));
+	df._name{2}(indc + (1:n)) = cellstr(dummy)
+	df._over{2}(1, indc + (1:n)) = true;
+      endif
+      df._cnt(2) = df._cnt(2) + n;
+      
+    case 3
+      if isempty(coltype),
+	coltype = 1:df._cnt(2);
+      endif
+      dummy = max(n+cellfun('size', df._data(coltype), 2));
+      if size(df._ridx, 2) < dummy,
+	df._ridx(:, end+1:dummy) = NA;
+      endif
+      for indi = coltype,
+	switch df._type{indi}
+	  case {'char'}
+	    if isa(df._data{indi}, 'char'),
+	      dummy = horzcat(df._data{indi}, {repmat(NA, df._cnt(1), n)});
+	    else
+	      dummy = df._data{indi};
+	    endif
+	  case { 'double' }
+	    dummy = horzcat(df._data{indi}, repmat(NA, df._cnt(1), n));
+	  otherwise
+	    dummy = cast(horzcat(df._data{indi}, repmat(NA, df._cnt(1), n)), ...
+			 df._type{indi});
+	endswitch
+	df._data{indi} = dummy;
+      endfor
+      df._cnt(3) = sum(cellfun('size', df._data, 2));
+    otherwise
+      error('Invalid dimension in df_pad');
+  endswitch
+
+endfunction		

Added: trunk/octave-forge/extra/dataframe/@dataframe/private/df_strjust.m
===================================================================
--- trunk/octave-forge/extra/dataframe/@dataframe/private/df_strjust.m	                        (rev 0)
+++ trunk/octave-forge/extra/dataframe/@dataframe/private/df_strjust.m	2010-08-05 16:23:32 UTC (rev 7498)
@@ -0,0 +1,36 @@
+function [a, b] = df_strjust(a, b)
+  
+  %# small auxiliary function: make two char arrays the same width
+
+  %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...>
+  %%
+  %% This file is part of Octave.
+  %%
+  %% Octave is free software; you can redistribute it and/or
+  %% modify it under the terms of the GNU General Public
+  %% License as published by the Free Software Foundation;
+  %% either version 2, or (at your option) any later version.
+  %%
+  %% Octave is distributed in the hope that it will be useful,
+  %% but WITHOUT ANY WARRANTY; without even the implied
+  %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+  %% PURPOSE.  See the GNU General Public License for more
+  %% details.
+  %%
+  %% You should have received a copy of the GNU General Public
+  %% License along with Octave; see the file COPYING.  If not,
+  %% write to the Free Software Foundation, 59 Temple Place -
+  %% Suite 330, Boston, MA 02111-1307, USA.
+  
+  %#
+  %# $Id: df_strjust.m 852 2010-07-22 10:47:55Z dupuis $
+  %#
+
+  indi = size(a, 2) - size(b, 2);
+  if indi < 0
+    a = horzcat(repmat(' ', size(a, 1), -indi), a);
+  elseif indi > 0,
+    b = horzcat(repmat(' ', size(b, 1), indi), b);
+  endif
+
+endfunction

Added: trunk/octave-forge/extra/dataframe/@dataframe/private/df_strset.m
===================================================================
--- trunk/octave-forge/extra/dataframe/@dataframe/private/df_strset.m	                        (rev 0)
+++ trunk/octave-forge/extra/dataframe/@dataframe/private/df_strset.m	2010-08-05 16:23:32 UTC (rev 7498)
@@ -0,0 +1,80 @@
+function [x, over] = df_strset(x, over, S, RHS, pad = ' ')
+  %# x = df_strset(x, over, S, RHS, pad = " ")
+  %# replaces the strings in cellstr x at indr by strings at y. Adapt
+  %# the width of x if required. Use x 'over' attribute to display a
+  %# message in case strings are overwritten.
+
+  %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...>
+  %%
+  %% This file is part of Octave.
+  %%
+  %% Octave is free software; you can redistribute it and/or
+  %% modify it under the terms of the GNU General Public
+  %% License as published by the Free Software Foundation;
+  %% either version 2, or (at your option) any later version.
+  %%
+  %% Octave is distributed in the hope that it will be useful,
+  %% but WITHOUT ANY WARRANTY; without even the implied
+  %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+  %% PURPOSE.  See the GNU General Public License for more
+  %% details.
+  %%
+  %% You should have received a copy of the GNU General Public
+  %% License along with Octave; see the file COPYING.  If not,
+  %% write to the Free Software Foundation, 59 Temple Place -
+  %% Suite 330, Boston, MA 02111-1307, USA.
+  
+  %#
+  %# $Id: df_strset.m 1020 2010-07-30 15:21:23Z dupuis $
+  %#
+
+  %# adjust x size, if required
+  if isnull(RHS),
+    %# clearing
+    if isempty(S),
+      x = cell(0, 1); over = zeros(1, 0);
+      return
+    endif
+    dummy = S; dummy(1).subs(2:end) = [];
+    over = builtin('subsasgn', over, dummy, true);
+  else
+    if isempty(S), %# complete overwrite
+      if ischar(RHS), RHS = cellstr(RHS); endif
+      nrow = length(RHS);
+      if any(~over(nrow)),
+	warning('going to overwrite names');
+      endif
+      x(1:nrow) = RHS;
+      over(1:nrow) = false;
+      if nrow < length(x),
+	x(nrow+1:end) = {pad};
+      endif
+      return
+    else
+      dummy = S(1); dummy.subs(2:end) = []; % keep first dim only
+      if any(~(builtin('subsref', over, dummy)));
+	warning('going to overwrite names');
+      endif
+      over = builtin('subsasgn', over, dummy, false);
+    endif
+  endif
+
+  %# common part
+  if ischar(RHS) && length(S(1).subs) > 1, 
+    %# partial accesses to a char array
+    dummy = char(x);
+    dummy = builtin('subsasgn', dummy, S, RHS);
+    if isempty(dummy),
+      x = cell(0, 1); over = zeros(1, 0);
+      return
+    endif
+    if size(dummy, 1) == length(x),
+      x = cellstr(dummy);
+      return
+    endif
+    %# partial clearing gone wrong ? retry
+    RHS = { RHS }; 
+  endif
+  x = builtin('subsasgn', x, S, RHS);
+    
+endfunction

Added: trunk/octave-forge/extra/dataframe/@dataframe/rationale.txt
===================================================================
--- trunk/octave-forge/extra/dataframe/@dataframe/rationale.txt	                        (rev 0)
+++ trunk/octave-forge/extra/dataframe/@dataframe/rationale.txt	2010-08-05 16:23:32 UTC (rev 7498)
@@ -0,0 +1,113 @@
+1) Context
+
+I was recently performing I-V measurements of a MOS
+(Metal-Oxide-Semiconductor) structure. A full set of measurements
+contained a DC biaising voltage, a AC frequency, a small signal
+capacitance and conductance. I had to change a few times the
+measurement device configuration, so sometimes the sweeping occured
+first on frequency, then on voltage, sometimes in the reverse
+order. To make it short, I had to deal with many input files with
+inconsistent columns order. The code to identify this order quickly
+became clumsy.
+
+The idea of a dataframe is to implement a mix between matrix and
+cells. Its' like a matrix, where each column contains elements of the
+same type. Unlike a matrix, columns type may be dissimilar. Also,
+each colum MUST have a name, and rows MAY have a name. Moreover, to
+make it easy to interface with databases, each row must have an unique
+identifier. The goal is to make possible to use constructs like
+y(:, ["Fr*"; "VB*"; "C";"G"])
+where y is the dataframe, and column selection is based on
+regexp. This way, the translation between names and indexes uses all
+the power of regexpes.
+
+2) Implementation
+a dataframe is a class containing the following members:
+_cnt = [0 0] : row count, column count, ... nth dimension count
+_name = cell(1, 2) : row names, column names, ...
+_ridx = []  : a unique Id for each row
+_data = cell(0, 0) : a container for each column
+_type = cell(0, 0) : the type of each column
+
+The constructor can be used as
+- no argument: convert the whole workspace to a dataframe (TBD)
+- one null argument: return an empty dataframe
+- one numeric or cell argument: transform it to a dataframe; tries to
+infer column names from the name of the input argument.
+- one char array with more than one line: uses it as rownames
+- one single line char array: take it as the name of a file to read
+data from. Expected format is csv, try to be carefull with
+quoted/unquoted strings, also tries to remove trailing and leading
+spaces from string entries. Do not try to cope with things such as
+separator INSIDE quoted strings.
+
+-supplemental arguments may occur either as pairs (string, value),
+ either as vectors. In the first case, the string contains an optional
+ parameter whose value is contained in the next argument. In the
+ second case,  the argument is right-appended to the dataframe. Valid
+ optional parameters are
+ - rownames: a character array with the row names
+ - unquot: a logical to indicate if strings must be unquoted, default=true
+ - seeked: a string which must occur in the first row to start
+ considering values. Previous lines are skipped.
+
+3) Access (reading)
+- like a single matrix: df(:, 3); df(3, :). If all the results are of
+the same type, returns a matrix, otherwise a dataframe. This behavior
+can be inhibited by having the last argument set to 'dataframe':
+  df(3, 3, 'dataframe') will return a one-by-one dataframe
+- by columnames:  
+  df(:, ["Fr*"; "VB*"; "C";])
+  will try to match a columname beginning by "F" followed by an
+  optional 'r', thus 'F', 'Fréquence' and 'Freqs'; then a columname
+  starting by "V" with an optional "B", like f.i. "VBias", then a
+  columname with is the exact string 'C'.
+- by rownames: same principle
+- either member selector may also be logical: 
+    df(df.OK=='A', ['C';'G']) 
+- as a struct: either use one of the column name (df.C), either use
+  one of the allowed accessor for internal fields: "rownames",
+  "colnames", "rowcnt", "colcnt", "rowidx", "types". Direct access to
+  the members like y._type is allowed, but should be restricted to
+  class members and friends. "types" accept both numeric and strings
+  arguments, the latter being converter to column order based upon
+  columns name.
+- as a cell: TODO: define how to fill the cell array with all the
+  fields.
+
+4) Modifying
+- as a matrix, using '()': use the same syntax as reading:
+  df(3, 'Fr*') = 200
+  df(df.OK=='?', ['C'; 'G']) = NaN;
+  Note that removing elements may only occur on a full row of colum
+  basis. Removing a single element is not allowed.
+- as a struct: either access a columname, as 
+  df.C = [];   
+  either accessing the internal fields through entry points 'rownames'
+  and 'colnames', where care is taken to adapt the strings width in
+  order to make them compatibles. The entry point "types", with
+  arguments numeric or strings, has the effect to cast whole column(s)
+  to a new type:
+  df.types{[3 5]} = 'uint16'
+  df.type{"Freq"} = "uint32"
+- as a cell: TBD
+
+5) other overloaded functions: display, size, numel, cat. The latter
+has to be thoroughfully tested. In particular, I've put the
+restriction that horizontal cat requires that the row indexes are the
+same for both elems. For vertical cat, how should we proceed ? Require
+uniqueness of row indexes, and sorting ? Other ?
+
+6) to be done:
+- the 'load' function is in fact contained inside the constructor;
+maybe we should have a specific load function ?
+- be able to load a dataframe from a URI specification
+- write a simple 'save' function
+- adding data to a dataframe: R doesn't seems to allow adding rows
+to a data.frame, should we follow it ?
+- add test cases
+- implement a 'factor' class for categorised data
+- make all functions below statistics/ dataframe compatible
+
+Pascal Dupuis
+Louvain-la-Neuve, July First, 2010.

Added: trunk/octave-forge/extra/dataframe/@dataframe/size.m
===================================================================
--- trunk/octave-forge/extra/dataframe/@dataframe/size.m	                        (rev 0)
+++ trunk/octave-forge/extra/dataframe/@dataframe/size.m	2010-08-05 16:23:32 UTC (rev 7498)
@@ -0,0 +1,60 @@
+function [nrow, ncol] = size(df, varargin)
+  %# function resu = end(df, varargin)
+  %# This is size operator for a dataframe object.
+
+  %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...>
+  %%
+  %% This file is part of Octave.
+  %%
+  %% Octave is free software; you can redistribute it and/or
+  %% modify it under the terms of the GNU General Public
+  %% License as published by the Free Software Foundation;
+  %% either version 2, or (at your option) any later version.
+  %%
+  %% Octave is distributed in the hope that it will be useful,
+  %% but WITHOUT ANY WARRANTY; without even the implied
+  %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+  %% PURPOSE.  See the GNU General Public License for more
+  %% details.
+  %%
+  %% You should have received a copy of the GNU General Public
+  %% License along with Octave; see the file COPYING.  If not,
+  %% write to the Free Software Foundation, 59 Temple Place -
+  %% Suite 330, Boston, MA 02111-1307, USA.
+  
+  %#
+  %# $Id: size.m 852 2010-07-22 10:47:55Z dupuis $
+  %#
+
+  switch nargin
+    case 1
+      switch nargout
+	case {0 1}
+	  nrow = df._cnt;
+	case {2}
+	  nrow = df._cnt(1); ncol = df._cnt(2);
+	otherwise
+	  error(print_usage());
+      endswitch
+    case 2
+      switch nargout
+	case {0 1}
+	  nrow = df._cnt;
+	  try
+	    nrow = nrow(varargin{1});
+	  catch
+	    error(print_usage());
+	  end_try_catch
+	otherwise
+	  error(print_usage());
+      endswitch
+    otherwise
+      error(print_usage());
+  endswitch
+
+endfunction
+
+function usage = print_usage()
+  usage = strcat('Invalid call to size.  Correct usage is: ', ' ', ...
+		  '-- Overloaded Function:  size (A, N)');
+endfunction

Added: trunk/octave-forge/extra/dataframe/@dataframe/subsasgn.m
===================================================================
--- trunk/octave-forge/extra/dataframe/@dataframe/subsasgn.m	                        (rev 0)
+++ trunk/octave-forge/extra/dataframe/@dataframe/subsasgn.m	2010-08-05 16:23:32 UTC (rev 7498)
@@ -0,0 +1,500 @@
+function resu = subasgn(df, S, RHS)
+  %# function resu = subasgn(df, S, RHS)
+  %# This is the assignement operator for a dataframe object, taking
+  %# care of all the housekeeping of meta-info.
+
+  %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...>
+  %%
+  %% This file is part of Octave.
+  %%
+  %% Octave is free software; you can redistribute it and/or
+  %% modify it under the terms of the GNU General Public
+  %% License as published by the Free Software Foundation;
+  %% either version 2, or (at your option) any later version.
+  %%
+  %% Octave is distributed in the hope that it will be useful,
+  %% but WITHOUT ANY WARRANTY; without even the implied
+  %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+  %% PURPOSE.  See the GNU General Public License for more
+  %% details.
+  %%
+  %% You should have received a copy of the GNU General Public
+  %% License along with Octave; see the file COPYING.  If not,
+  %% write to the Free Software Foundation, 59 Temple Place -
+  %% Suite 330, Boston, MA 02111-1307, USA.
+  
+  %#
+  %# $Id: subsasgn.m 1035 2010-08-03 16:22:58Z dupuis $
+  %#
+
+  switch(S(1).type)
+    case '{}'
+      error('Invalid dataframe as cell assignement');
+    case '.'
+      resu = df;
+      %# translate the external to internal name
+      switch S(1).subs
+	case "rownames"
+	  if !isnull(RHS) && isempty(df._name{1}),
+	    df._name{1}(1:df._cnt(1), 1) = {''};
+	    df._over{1}(1, 1:df._cnt(1)) = true;
+	  endif
+	  [resu._name{1}, resu._over{1}] = df_strset...
+	      (df._name{1}, df._over{1}, S(2:end), RHS);
+	  return
+	
+	case "colnames"
+	  if isnull(RHS), error("Colnames can't be nulled"); endif
+	  [resu._name{2}, resu._over{2}] = df_strset...
+	      (df._name{2}, df._over{2}, S(2:end), RHS, '_');
+	  return
+	  
+	case "types"
+	  if isnull(RHS), error("Types can't be nulled"); endif
+	  if 1 == length(S),
+	    for indi = 1:df_cnt(2),
+	      %# perform explicit cast on each column
+	      resu._data{indi} = cast(resu._data{indi}, RHS);
+	      resu._type{indi} = RHS;
+	    endfor
+	  else
+	    if !strcmp(S(2).type, '{}'),
+	      error("Invalid cell access");
+	    endif 
+	    if length(S) > 2 || length(S(2).subs) > 1,
+	      error("Types can only be changed as a whole");
+	    endif
+	    if !isnumeric(S(2).subs{1}),
+	      [indj, ncol, S(2).subs{1}] = df_name2idx...
+		  (df._name{2}, S(2).subs{1}, df._cnt(2), 'column');
+	    endif
+	    for indi = 1:length(indj),
+	      %# perform explicit cast on selected columns
+	      resu._data{indj(indi)} = cast(resu._data{indj(indi)}, RHS);
+	      resu._type{indj(indi)} = RHS;
+	    endfor 
+	  endif
+	  return
+	  
+	otherwise
+	  if !ischar(S(1).subs),
+	    error("Congratulations. I didn't see how to ...
 
[truncated message content]

[Octave-cvsupdate] SF.net SVN: octave:[7498] trunk/octave-forge/extra

A collection of packages providing extra functionality for GNU Octave

[Octave-cvsupdate] SF.net SVN: octave:[7498] trunk/octave-forge/extra