From: <cde...@us...> - 2010-08-05 16:23:41
|
Revision: 7498 http://octave.svn.sourceforge.net/octave/?rev=7498&view=rev Author: cdemills Date: 2010-08-05 16:23:32 +0000 (Thu, 05 Aug 2010) Log Message: ----------- Adding package dataframe Added Paths: ----------- trunk/octave-forge/extra/dataframe/ trunk/octave-forge/extra/dataframe/@dataframe/ trunk/octave-forge/extra/dataframe/@dataframe/cat.m trunk/octave-forge/extra/dataframe/@dataframe/dataframe.m trunk/octave-forge/extra/dataframe/@dataframe/display.m trunk/octave-forge/extra/dataframe/@dataframe/end.m trunk/octave-forge/extra/dataframe/@dataframe/fold.m trunk/octave-forge/extra/dataframe/@dataframe/minus.m trunk/octave-forge/extra/dataframe/@dataframe/numel.m trunk/octave-forge/extra/dataframe/@dataframe/plus.m trunk/octave-forge/extra/dataframe/@dataframe/private/ trunk/octave-forge/extra/dataframe/@dataframe/private/df_basecomp.m trunk/octave-forge/extra/dataframe/@dataframe/private/df_check_char_array.m trunk/octave-forge/extra/dataframe/@dataframe/private/df_name2idx.m trunk/octave-forge/extra/dataframe/@dataframe/private/df_pad.m trunk/octave-forge/extra/dataframe/@dataframe/private/df_strjust.m trunk/octave-forge/extra/dataframe/@dataframe/private/df_strset.m trunk/octave-forge/extra/dataframe/@dataframe/rationale.txt trunk/octave-forge/extra/dataframe/@dataframe/size.m trunk/octave-forge/extra/dataframe/@dataframe/subsasgn.m trunk/octave-forge/extra/dataframe/@dataframe/subsref.m trunk/octave-forge/extra/dataframe/@dataframe/summary.m trunk/octave-forge/extra/dataframe/data_test.csv trunk/octave-forge/extra/dataframe/dataframe trunk/octave-forge/extra/dataframe/octave_frame.zip Added: trunk/octave-forge/extra/dataframe/@dataframe/cat.m =================================================================== --- trunk/octave-forge/extra/dataframe/@dataframe/cat.m (rev 0) +++ trunk/octave-forge/extra/dataframe/@dataframe/cat.m 2010-08-05 16:23:32 UTC (rev 7498) @@ -0,0 +1,116 @@ +function resu = cat(dim, A, varargin) + %# function resu = cat(dim, A, varargin) + %# This is the concatenation operator for a dataframe object. "Dim" + %# has the same meaning as ordinary cat. Next arguments may be + %# dataframe, vector/matrix, or two elements cells. First one is taken + %# as row/column name, second as data. + + %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...> + %% + %% This file is part of Octave. + %% + %% Octave is free software; you can redistribute it and/or + %% modify it under the terms of the GNU General Public + %% License as published by the Free Software Foundation; + %% either version 2, or (at your option) any later version. + %% + %% Octave is distributed in the hope that it will be useful, + %% but WITHOUT ANY WARRANTY; without even the implied + %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + %% PURPOSE. See the GNU General Public License for more + %% details. + %% + %% You should have received a copy of the GNU General Public + %% License along with Octave; see the file COPYING. If not, + %% write to the Free Software Foundation, 59 Temple Place - + %% Suite 330, Boston, MA 02111-1307, USA. + + %# + %# $Id: cat.m 1025 2010-08-02 08:55:55Z dupuis $ + %# + + switch dim + case 1 + resu = A; + + for indi=1:length(varargin), + B = varargin{indi}; + if !isa(B, 'dataframe'), + if iscell(B) && 2 == length(B), + B = dataframe(B{2}, 'rownames', B{1}); + else + B = dataframe(B, 'colnames', inputname(2+indi)); + endif + endif + if resu._cnt(2) != B._cnt(2), + error('Different number of columns in dataframes'); + endif + %# do not duplicate empty names + if !isempty(resu._name{1}) || !isempty(B._name{1}), + if length(resu._name{1}) < resu._cnt(1), + resu._name{1}(end+1:resu._cnt(1), 1) = {''}; + endif + if length(B._name{1}) < B._cnt(1), + B._name{1}(end+1:B._cnt(1), 1) = {''}; + endif + resu._name{1} = vertcat(resu._name{1}(:), B._name{1}(:)); + resu._over{1} = [resu._over{1} B._over{1}]; + endif + resu._cnt(1) = resu._cnt(1) + B._cnt(1); + resu._ridx = [resu._ridx(:); B._ridx(:)]; + %# find data with same column names + indr = logical(ones(1, resu._cnt(2))); + indb = logical(ones(1, resu._cnt(2))); + indi = 1; + while indi <= resu._cnt(2), + indj = strmatch(resu._name{2}(indi), B. _name{2}); + if ~isempty(indj), + indj = indj(1); + if ~strcmp(resu._type{indi}, B._type{indj}), + error("Trying to mix columns of different types"); + endif + resu._data{indi} = [resu._data{indi}; B._data{indj}]; + indr(indi) = false; indb(indj) = false; + endif + indi = indi + 1; + endwhile + if any(indr) || any(indb) + error('Different number/names of columns in dataframe'); + endif + endfor + + case 2 + resu = A; + for indi=1:length(varargin), + B = varargin{indi}; + if !isa(B, 'dataframe'), + if iscell(B) && 2 == length(B), + B = dataframe(B{2}, 'colnames', B{1}); + else + B = dataframe(B, 'colnames', inputname(2+indi)); + endif + B._ridx = resu._ridx; %# make them compatibles + endif + if resu._cnt(1) != B._cnt(1), + error('Different number of rows in dataframes'); + endif + if any(resu._ridx(:) - B._ridx(:)) + error('dataframes row indexes not matched'); + endif + resu._name{2} = vertcat(resu._name{2}, B._name{2}); + resu._over{2} = [resu._over{2} B._over{2}]; + indj = resu._cnt(2) + 1; + for indi = 1:B._cnt(2), + resu._data{indj} = B._data{indi}; + resu._type{indj} = B._type{indi}; + indj = indj + 1; + endfor + resu._cnt(2) = resu._cnt(2) + B._cnt(2); + endfor + + otherwise + error('Incorrect call to cat'); + endswitch + + %# disp('End of cat'); keyboard +endfunction Added: trunk/octave-forge/extra/dataframe/@dataframe/dataframe.m =================================================================== --- trunk/octave-forge/extra/dataframe/@dataframe/dataframe.m (rev 0) +++ trunk/octave-forge/extra/dataframe/@dataframe/dataframe.m 2010-08-05 16:23:32 UTC (rev 7498) @@ -0,0 +1,269 @@ +function df = dataframe(x = [], varargin) + + %# -*- texinfo -*- + %# @deftypefn {Function File} @var{df} = dataframe(@var{x = []}, ...) + %# This is the default constructor for a dataframe object, which is + %# similar to R 'data.frame'. It's a way to group tabular data, then + %# accessing them either as matrix or by column name. + %# Input argument x may be: @itemize + %# @item a dataframe => use @var{varargin} to pad it with suplemental + %# columns + %# @item a matrix => create column names from input name; each column + %# is used as an entry + %# @item a cell matrix => try to infer column names from the first row, + %# and row indexes and names from the two first columns; + %# @item a file name => import data into a dataframe; + %# @item a matrix of char => initialise colnames from them. + %# @item a two-element cell: use the first as column as column to + %# append to, and the second as initialiser for the column(s) + %# @end itemize + %# If called with an empty value, or with the default argument, it + %# returns an empty dataframe which can be further populated by + %# assignement, cat, ... If called without any argument, it should + %# return a dataframe from the whole workspace. + %# @*Variable input arguments are first parsed as pairs (options, values). + %# Recognised options are: @itemize + %# @item rownames : take the values as initialiser for row names + %# @item colnames : take the values as initialiser for column names + %# @item seeked : a filed value which triggers start of processing. + %# Each preceeding line is silently skipped. Default: none + %# @item unquot: a logical switch telling wheter or not strings should + %# be unquoted before storage, default = true; + %# @end itemize + %# The remaining data are concatenanted (right-appended) to the existing ones. + %# @end deftypefn + + %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...> + %% + %% This file is part of Octave. + %% + %% Octave is free software; you can redistribute it and/or + %% modify it under the terms of the GNU General Public + %% License as published by the Free Software Foundation; + %% either version 2, or (at your option) any later version. + %% + %% Octave is distributed in the hope that it will be useful, + %% but WITHOUT ANY WARRANTY; without even the implied + %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + %% PURPOSE. See the GNU General Public License for more + %% details. + %% + %% You should have received a copy of the GNU General Public + %% License along with Octave; see the file COPYING. If not, + %% write to the Free Software Foundation, 59 Temple Place - + %% Suite 330, Boston, MA 02111-1307, USA. + + %# + %# $Id: dataframe.m 1036 2010-08-03 16:24:01Z dupuis $ + %# + +if 0 == nargin + disp('FIXME -- should create a dataframe from the whole workspace') + return +endif + +if isempty(x), + %# default constructor: initialise the fields in the right order + df._cnt = [0 0]; + df._name = {cell(0, 1), cell(1, 0)}; %# rows - cols + df._over = cell(1, 2); + df._ridx = []; + df._data = cell(0, 0); + df._type = cell(0, 0); + df = class(df, 'dataframe'); + return +endif + +if isa(x, 'dataframe') + df = x; +elseif isa(x, 'struct'), + df = class(x, 'dataframe'); return +else + df = dataframe([]); %# get the right fields +endif + +seeked = []; unquot = true; + +if length(varargin) > 0, + indi = 1; + %# loop over possible arguments + while indi <= size(varargin, 2), + switch(varargin{indi}) + case 'rownames' + if !iscell(varargin{indi+1}), + df._name{1} = cellstr(varargin{indi+1}); + else + df._name{1} = varargin{indi+1}; + endif + df._over{1}(1, 1:length(df._name{1})) = false; + df._cnt(1) = size(df._name{1}, 1); + varargin(indi:indi+1) = []; + case 'colnames' + if !iscell(varargin{indi+1}), + df._name{2} = cellstr(varargin{indi+1}); + else + df._name{2} = varargin{indi+1}; + endif + dummy = cellfun(@(x) strsplit(x, '='), df._name{2}, ... + "UniformOutput", false); + disp('line 89 '); keyboard + df._over{2}(1, 1:length(df._name{2})) = false; + varargin(indi:indi+1) = []; + case 'seeked', + seeked = varargin{indi + 1}; + varargin(indi:indi+1) = []; + case 'unquot', + unquot = varargin{indi + 1}; + varargin(indi:indi+1) = []; + otherwise %# FIXME: just skip it for now + indi = indi + 1; + endswitch + endwhile +endif + +indi = 0; +while indi <= size(varargin, 2), + indi = indi + 1; + if ~isa(x, 'dataframe') + if isa(x, 'char') && size(x, 1) < 2, + %# read the data frame from a file + try + x = load(tilde_expand(x)); + catch + UTF8_BOM = char([0xEF 0xBB 0xBF]); + unwind_protect + fid = fopen(tilde_expand(x)); + dummy = fgetl(fid); + if !strcmp(dummy, UTF8_BOM), + frewind(fid); + endif + in = fscanf(fid, "%c"); %# slurps everything + unwind_protect_cleanup + fclose(fid); + end_unwind_protect + lines = regexp(in,'(^|\n)([^\n]+)', 'match'); %# cut into lines + content = cellfun(@(x) regexp(x, '(\b|'')[^,]+(''|\b)', 'match'), ... + lines, 'UniformOutput', false); %# extract fields + indl = 1; indj = 1; %# disp('line 151 '); keyboard + if ~isempty(seeked), + while indl <= length(lines), + dummy = content{indl}; + if strcmp(dummy{1}, seeked) + break; + endif + indl = indl + 1; + endwhile + else + dummy = content{indl}; + endif + x = cell(1+length(lines)-indl, size(dummy, 2)); + while indl <= length(lines), + dummy = content{indl}; + %# try to convert to float + the_line = cellfun(@(x) sscanf(x, "%f"), dummy, ... + 'UniformOutput', false); + for indk = 1: size(the_line, 2), + if isempty(the_line{indk}) || any(size(the_line{indk}) > 1), + %#if indi > 1 && indk > 1, disp('line 117 '); keyboard; endif + if unquot, + try + x(indj, indk) = regexp(dummy{indk}, '[^''].*[^'']', 'match'){1}; + catch + %# if the previous test fails, try a simpler one + in = regexp(dummy{indk}, '[^'']+', 'match'); + if !isempty(in), + x(indj, indk) = in{1}; + else + x(indj, indk) = []; + endif + end_try_catch + else + x(indj, indk) = dummy{indk}; %# no conversion possible + endif + else + x(indj, indk) = the_line{indk}; + endif + endfor + indl = indl + 1; indj = indj + 1; + endwhile + clear UTF8_BOM fid in lines indl the_line content + end_try_catch + endif + + %# fallback, avoiding a recursive call + idx.type = '()'; + + if iscell(x), + if 2 == length(x), + %# use the intermediate value as destination column + [indc, ncol] = df_name2idx(df._name{2}, x{1}, df._cnt(2), "column"); + if ncol != 1, + error(["With two-elements cell, the first should resolve " ... + "to a single column"]); + endif + try + dummy = cellfun('class', x{2}(2, :), 'UniformOutput', false); + catch + dummy = cellfun('class', x{2}(1, :), 'UniformOutput', false); + end_try_catch + df = df_pad(df, 2, [length(dummy) indc], dummy); + x = x{2}; + indj = indc + (1:size(x, 2)); + else + indj = df._cnt(2)+(1:size(x, 2)); + endif + if length(df._name{2}) < indj(1) || isempty(df._name{2}(indj)), + [df._name{2}(indj, 1), df._over{2}(1, indj)] ... + = df_colnames(inputname(indi), indj); + endif + %# allow overwriting of column names + df._over{2}(1, indj) = true; + else + if length(df._name{2}) < indj(1) || isempty(df._name{2}(indj)), + [df._name{2}(indj, 1), df._over{2}(1, indj)] ... + = df_colnames(inputname(indi), indj); + endif + endif + idx.subs = {'', indj}; + %# use direct assignement + df = subsasgn(df, idx, x); + elseif indi > 1, + error('Concatenating dataframes: use cat instead'); + endif + + try + %# loop over next variable argument + x = varargin{1, indi}; + catch + %# disp('line 197 ???'); + end_try_catch + +endwhile + +endfunction + +function [x, y] = df_colnames(base, num) + %# small auxiliary function to generate column names. This is required + %# here, as only the constructor can use inputname() + if any([index(base, "=")]), + %# takes the left part as base + x = strsplit(base, "="); + x = deblank(x{1}); y = false; + elseif any([index(base, '''')]), + %# base is most probably a filename + x = regexp(base, '[^''].*[^'']', 'match'){1}; y = true; + elseif any([index(base, "(") index(base, ":")]), + x = 'X'; y = true; %# this is a default value, may be changed + else + x = base; y = false; + endif + + if numel(num) > 1, + x = repmat(x, numel(num), 1); + x = cstrcat(x, strjust(num2str(num(:)), 'left')); + y = repmat(y, 1, numel(num)); + endif + + x = cellstr(x); + +endfunction Added: trunk/octave-forge/extra/dataframe/@dataframe/display.m =================================================================== --- trunk/octave-forge/extra/dataframe/@dataframe/display.m (rev 0) +++ trunk/octave-forge/extra/dataframe/@dataframe/display.m 2010-08-05 16:23:32 UTC (rev 7498) @@ -0,0 +1,162 @@ +function resu = display(df) + + %# function resu = display(df) + %# Tries to produce a nicely formatted output of a dataframe. + + %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...> + %% + %% This file is part of Octave. + %% + %% Octave is free software; you can redistribute it and/or + %% modify it under the terms of the GNU General Public + %% License as published by the Free Software Foundation; + %% either version 2, or (at your option) any later version. + %% + %% Octave is distributed in the hope that it will be useful, + %% but WITHOUT ANY WARRANTY; without even the implied + %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + %% PURPOSE. See the GNU General Public License for more + %% details. + %% + %% You should have received a copy of the GNU General Public + %% License along with Octave; see the file COPYING. If not, + %% write to the Free Software Foundation, 59 Temple Place - + %% Suite 330, Boston, MA 02111-1307, USA. + + %# + %# $Id: display.m 1027 2010-08-02 08:59:39Z dupuis $ + %# + +%# generate header name +if 2 == length(df._cnt), + head = sprintf("Dataframe with %d rows and %d columns", df._cnt); +else + head = sprintf("Dataframe with %d rows and %d columns (%d unfolded)", ... + df._cnt); +endif +if all(df._cnt > 0), %# stop for empty df + vspace = repmat(' ', df._cnt(1), 1); + indi = 1; %# the real, unfolded index + for indc = 1:df._cnt(2), %# loop over columns + %# emit column names and type + if 1 == size(df._data{indc}, 2), + dummy{1, 2+indi} = deblank(disp(df._name{2}{indc})); + dummy{2, 2+indi} = deblank(df._type{indc}); + else + %# append a dot and the third-dimension index to column name + tmp_str = [deblank(disp(df._name{2}{indc})) "."]; + tmp_str = arrayfun(@(x) horzcat(tmp_str, num2str(x)), ... + (1:size(df._data{indc}, 2)), 'UniformOutput', false); + dummy{1, 2+indi} = tmp_str{1}; + dummy{2, 2+indi} = deblank(df._type{indc}); + indk = 1; while indk < size(df._data{indc}, 2), + dummy{1, 2+indi+indk} = tmp_str{1+indk}; + dummy{2, 2+indi+indk} = dummy{2, 2+indi}; + indk = indk + 1; + endwhile + endif + %# "print" each column + switch df._type{indc} + case {'char'} + indk = 1; while indk <= size(df._data{indc}, 2), + tmp_str = df._data{indc}(:, indk); %#get the whole column + indj = cellfun('isprint', tmp_str, 'UniformOutput', false); + indj = ~cellfun('all', indj); + for indr = 1:length(indj), + if indj(indr), + if isna(tmp_str{indr}), + tmp_str{indr} = "NA"; + else + tmp_str{indr} = undo_string_escapes(tmp_str{indr}); + endif + endif + endfor + %# keep the whole thing, and add a vertical space + dummy{3, 2+indi} = disp(char(tmp_str)); + dummy{3, 2+indi} = horzcat... + (vspace, char(regexp(dummy{3, 2+indi}, '.*', ... + 'match', 'dotexceptnewline'))); + indi = indi + 1; indk = indk + 1; + endwhile + otherwise + %# keep only one horizontal space per line + indk = 1; while indk <= size(df._data{indc}, 2), + dummy{3, 2+indi} = disp(df._data{indc}(:, indk)); + tmp_str = char(regexp(dummy{3, 2+indi}, ' \S.*', ... + 'match', 'dotexceptnewline')); + if size(tmp_str, 1) < df._cnt(1), + tmp_str = horzcat... + (vspace, char(regexp(dummy{3, 2+indi}, '\S.*', ... + 'match', 'dotexceptnewline'))); + endif + dummy{3, 2+indi} = tmp_str; + indi = indi + 1; indk = indk + 1; + endwhile + endswitch + endfor + + vspace = [' '; ' '; vspace]; + %# second line content + if 1 == size(df._ridx, 2), + dummy{2, 1} = ["_"; "Nr"]; + dummy{3, 1} = disp(df._ridx(:)); + indi = regexp(dummy{3, 1}, '\b.*\b', 'match', 'dotexceptnewline'); + resu = strjust(char(dummy{2, 1}, indi), 'right'); + else + resu = []; + for indi = 1:size(df._ridx, 2)-1, + dummy{2, 1} = [["_." num2str(indi)]; "Nr"]; + dummy{3, 1} = disp(df._ridx(:, indi)); + indj = regexp(dummy{3, 1}, '\b.*\b', 'match', 'dotexceptnewline'); + resu = horzcat(resu, strjust(char(dummy{2, 1}, indj), 'right'), vspace); + endfor + dummy{2, 1} = [["_." num2str(indi+1)]; "Nr"]; + dummy{3, 1} = disp(df._ridx(:, end)); + indj = regexp(dummy{3, 1}, '\b.*\b', 'match', 'dotexceptnewline'); + resu = horzcat(resu, strjust(char(dummy{2, 1}, indj), 'right')); + endif + %# emit row names + if isempty(df._name{1}), + dummy{2, 2} = []; dummy{3, 2} = []; + else + dummy{2, 2} = [" ";" "]; + dummy{3, 2} = df._name{1}; + endif + + if size(dummy, 2) > 1, + %# resu contains the ridx + + %# insert a vertical space + if !isempty(dummy{3, 2}), + indi = ~cellfun('isempty', dummy{3, 2}); + if any(indi), + resu = horzcat(resu, vspace); + resu = horzcat(resu, strjust(char(dummy{2, 2}, dummy{3,2}), 'right')); + endif + endif + + %# emit each colum + for indi = 1:max(df._cnt(2:end)), + try + %# avoid this column touching the previous one + if any(cellfun('size', dummy(1:2, 2+indi), 2) >= ... + size(dummy{3, 2+indi}, 2)), + resu = horzcat(resu, vspace); + endif + resu = horzcat(resu, strjust(char(dummy{:, 2+indi}), 'right')); + catch + tmp_str = sprintf("Emitting %d lines, expecting %d", ... + size(dummy{3, 2+indi}, 1), df._cnt(1)); + error(tmp_str); + end_try_catch + endfor + else + resu = ''; + endif +else + resu = ''; +endif + +resu = char(head, resu); disp(resu) + + Added: trunk/octave-forge/extra/dataframe/@dataframe/end.m =================================================================== --- trunk/octave-forge/extra/dataframe/@dataframe/end.m (rev 0) +++ trunk/octave-forge/extra/dataframe/@dataframe/end.m 2010-08-05 16:23:32 UTC (rev 7498) @@ -0,0 +1,36 @@ +function resu = end(df, k, n) + %# function resu = end(df, k, n) + %# This is the end operator for a dataframe object, returning the + %# maximum number of rows or columns + + %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...> + %% + %% This file is part of Octave. + %% + %% Octave is free software; you can redistribute it and/or + %% modify it under the terms of the GNU General Public + %% License as published by the Free Software Foundation; + %% either version 2, or (at your option) any later version. + %% + %% Octave is distributed in the hope that it will be useful, + %% but WITHOUT ANY WARRANTY; without even the implied + %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + %% PURPOSE. See the GNU General Public License for more + %% details. + %% + %% You should have received a copy of the GNU General Public + %% License along with Octave; see the file COPYING. If not, + %% write to the Free Software Foundation, 59 Temple Place - + %% Suite 330, Boston, MA 02111-1307, USA. + + %# + %# $Id: end.m 852 2010-07-22 10:47:55Z dupuis $ + %# + + try + resu = df._cnt(k); + catch + error("incorrect call to end, index greater than number of dimensions"); + end_try_catch + +endfunction Added: trunk/octave-forge/extra/dataframe/@dataframe/fold.m =================================================================== --- trunk/octave-forge/extra/dataframe/@dataframe/fold.m (rev 0) +++ trunk/octave-forge/extra/dataframe/@dataframe/fold.m 2010-08-05 16:23:32 UTC (rev 7498) @@ -0,0 +1,95 @@ +function resu = fold(df, dim, indr, indc) + + %# function resu = subasgn(df, S, RHS) + %# The purpose is to fold a dataframe. Part from (1:indr-1) doesn't + %# move, then content starting at indr is moved into the second, + %# third, ... sheet. To be moved, there must be equality of rownames, + %# if any, and of fields contained in indc. + + + %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...> + %% + %% This file is part of Octave. + %% + %% Octave is free software; you can redistribute it and/or + %% modify it under the terms of the GNU General Public + %% License as published by the Free Software Foundation; + %% either version 2, or (at your option) any later version. + %% + %% Octave is distributed in the hope that it will be useful, + %% but WITHOUT ANY WARRANTY; without even the implied + %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + %% PURPOSE. See the GNU General Public License for more + %% details. + %% + %% You should have received a copy of the GNU General Public + %% License along with Octave; see the file COPYING. If not, + %% write to the Free Software Foundation, 59 Temple Place - + %% Suite 330, Boston, MA 02111-1307, USA. + + %# + %# $Id: fold.m 1016 2010-07-30 13:49:35Z dupuis $ + %# +switch dim + case 1 + [indr, nrow] = df_name2idx(df._name{1}, indr, df._cnt(1), 'row'); + [indc, ncol] = df_name2idx(df._name{2}, indc, df._cnt(2), 'column'); + + if indr(1) > 1, + slice_size = indr(1) - 1; + %# we can't use directly resu = df(1:slice_size, :, :) + S.type = '()'; + S.subs = { 1:slice_size, ':', ':', 'dataframe'}; + resu = subsref(df, S); + + %# how many columns for each slice + targets = cellfun('size', df._data, 2); + %# a test function to determine if the location is free + for indj = 1:df._cnt(2), + if any(indj == indc), + continue; + endif + switch df._type{indj} + case { 'char' } + testfunc{indj} = @(x, indr, indc) ... + !isna(x{indr, indc}); + otherwise + testfunc{indj} = @(x, indr, indc) ... + !isna(x(indr, indc)); + endswitch + endfor + + for indi = indr, + %# where does this line go ? + where = find(df._data{indc}(1:slice_size, 1) ... + == df._data{indc}(indi, 1)); + if !isempty(where), + %# transfering one line -- loop over columns + for indj = 1:df._cnt(2), + if any(indj == indc), + continue; + endif + + if testfunc{indj}(resu._data{indj}, where, targets(indj)) + %# add one more sheet + resu = df_pad(resu, 3, 1, indj); + targets(indj) = targets(indj) + 1; + endif + %# transfer field + resu._data{indj}(where, targets(indj)) = ... + df._data{indj}(indi, 1); + endfor + %# update row index + resu._ridx(where, max(targets)) = df._ridx(indi); + else + disp('line 65: FIXME'); keyboard; + endif + endfor + + else + + disp('line 70: FIXME '); keyboard + endif + + +endswitch Added: trunk/octave-forge/extra/dataframe/@dataframe/minus.m =================================================================== --- trunk/octave-forge/extra/dataframe/@dataframe/minus.m (rev 0) +++ trunk/octave-forge/extra/dataframe/@dataframe/minus.m 2010-08-05 16:23:32 UTC (rev 7498) @@ -0,0 +1,66 @@ +function resu = minus(A, B); + + %# function resu = minus(A, B) + %# Implements the '-' operator when at least one one argument is a dataframe. + + %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...> + %% + %% This file is part of Octave. + %% + %% Octave is free software; you can redistribute it and/or + %% modify it under the terms of the GNU General Public + %% License as published by the Free Software Foundation; + %% either version 2, or (at your option) any later version. + %% + %% Octave is distributed in the hope that it will be useful, + %% but WITHOUT ANY WARRANTY; without even the implied + %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + %% PURPOSE. See the GNU General Public License for more + %% details. + %% + %% You should have received a copy of the GNU General Public + %% License along with Octave; see the file COPYING. If not, + %% write to the Free Software Foundation, 59 Temple Place - + %% Suite 330, Boston, MA 02111-1307, USA. + + %# + %# $Id: minus.m 852 2010-07-22 10:47:55Z dupuis $ + %# + + [A, B] = df_basecomp(A, B); + + if isscalar(A) + %# B is a dataframe + resu = B; + for indi = 1:B._cnt(2), + resu._data{indi} = A-B._data{indi}; + endfor + return + elseif ismatrix(A), + resu = B; + for indi = 1:B._cnt(2), + resu._data{indi} = A(indi, :) - B._data{indi}; + endfor + return + endif + + if isscalar(B), + resu = A; + for indi = 1:A._cnt(2), + resu._data{indi} = A._data{indi} -B; + endfor + return + elseif ismatrix(B), + resu = A; + for indi = 1:A._cnt(2), + resu._data{indi} = A._data{indi} -B(:, indi); + endfor + return + endif + + resu = A; + for indi = 1:A._cnt(2), + resu._data{indi} = A._data{indi} -B._data{indi}; + endfor + +endfunction Added: trunk/octave-forge/extra/dataframe/@dataframe/numel.m =================================================================== --- trunk/octave-forge/extra/dataframe/@dataframe/numel.m (rev 0) +++ trunk/octave-forge/extra/dataframe/@dataframe/numel.m 2010-08-05 16:23:32 UTC (rev 7498) @@ -0,0 +1,41 @@ +function n = numel(df, varargin) + %# function resu = end(df, varargin) + %# This is the numel operator for a dataframe object, returning the + %# product of the number of rows by the number of columns + + %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...> + %% + %% This file is part of Octave. + %% + %% Octave is free software; you can redistribute it and/or + %% modify it under the terms of the GNU General Public + %% License as published by the Free Software Foundation; + %% either version 2, or (at your option) any later version. + %% + %% Octave is distributed in the hope that it will be useful, + %% but WITHOUT ANY WARRANTY; without even the implied + %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + %% PURPOSE. See the GNU General Public License for more + %% details. + %% + %% You should have received a copy of the GNU General Public + %% License along with Octave; see the file COPYING. If not, + %% write to the Free Software Foundation, 59 Temple Place - + %% Suite 330, Boston, MA 02111-1307, USA. + + %# + %# $Id: numel.m 981 2010-07-26 16:23:08Z dupuis $ + %# + +if 1 == nargin, + n = prod(df._cnt([1 end])); +else + error(print_usage()); +endif + +endfunction + +function usage = print_usage() + usage = strcat('Invalid call to numel. Correct usage is: ', ' ', ... + '-- Overloaded Function: numel (A)'); +endfunction Added: trunk/octave-forge/extra/dataframe/@dataframe/plus.m =================================================================== --- trunk/octave-forge/extra/dataframe/@dataframe/plus.m (rev 0) +++ trunk/octave-forge/extra/dataframe/@dataframe/plus.m 2010-08-05 16:23:32 UTC (rev 7498) @@ -0,0 +1,54 @@ +function resu = minus(A, B); + + %# function resu = minus(A, B) + %# Implements the '-' operator when at least one one argument is a dataframe. + + %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...> + %% + %% This file is part of Octave. + %% + %% Octave is free software; you can redistribute it and/or + %% modify it under the terms of the GNU General Public + %% License as published by the Free Software Foundation; + %% either version 2, or (at your option) any later version. + %% + %% Octave is distributed in the hope that it will be useful, + %% but WITHOUT ANY WARRANTY; without even the implied + %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + %% PURPOSE. See the GNU General Public License for more + %% details. + %% + %% You should have received a copy of the GNU General Public + %% License along with Octave; see the file COPYING. If not, + %% write to the Free Software Foundation, 59 Temple Place - + %% Suite 330, Boston, MA 02111-1307, USA. + + %# + %# $Id: plus.m 852 2010-07-22 10:47:55Z dupuis $ + %# + + [A, B] = df_basecomp(A, B); + + if isscalar(A) + %# B is a dataframe + resu = B; + for indi = 1:B._cnt(2), + resu._data{indi} = A+B._data{indi}; + endfor + return + endif + + if isscalar(B), + resu = A; + for indi = 1:A._cnt(2), + resu._data{indi} = A._data{indi}+B; + endfor + return + endif + + resu = A; + for indi = 1:A._cnt(2), + resu._data{indi} = A._data{indi} + B._data{indi}; + endfor + +endfunction Added: trunk/octave-forge/extra/dataframe/@dataframe/private/df_basecomp.m =================================================================== --- trunk/octave-forge/extra/dataframe/@dataframe/private/df_basecomp.m (rev 0) +++ trunk/octave-forge/extra/dataframe/@dataframe/private/df_basecomp.m 2010-08-05 16:23:32 UTC (rev 7498) @@ -0,0 +1,62 @@ +function [A, B] = df_basecomp(A, B); + + %# function [A, B] = df_basecomp(A, B) + %# Basic size verifcation for binary operations on dataframe. Returns + %# a scalar, a matrix, or a dataframe. Cell arrays are converted to df. + + %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...> + %% + %% This file is part of Octave. + %% + %% Octave is free software; you can redistribute it and/or + %% modify it under the terms of the GNU General Public + %% License as published by the Free Software Foundation; + %% either version 2, or (at your option) any later version. + %% + %% Octave is distributed in the hope that it will be useful, + %% but WITHOUT ANY WARRANTY; without even the implied + %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + %% PURPOSE. See the GNU General Public License for more + %% details. + %% + %% You should have received a copy of the GNU General Public + %% License along with Octave; see the file COPYING. If not, + %% write to the Free Software Foundation, 59 Temple Place - + %% Suite 330, Boston, MA 02111-1307, USA. + + %# + %# $Id: df_basecomp.m 852 2010-07-22 10:47:55Z dupuis $ + %# + + if isscalar(A) || isscalar(B) + return + endif + + if iscell(A), + A = dataframe(A); + elseif iscell(B), + B = dataframe(B); + endif + + if any(size(A) - size(B)), + error("Non compatible sizes"); + endif + if !isa(A, 'dataframe') || !isa(B, 'dataframe'), + return; %# don't go further with names/indexes comparisons + endif + + if any(A._ridx-B._ridx), + error("Non compatible indexes"); + endif + if !isempty(A._name{1}) && !isempty(B._name{1}) + if !any(strcmp(cellstr(A._name{1}), cellstr(B._name{1}))), + error("Incompatible row names"); + endif + endif + if !isempty(A._name{2}) && !isempty(B._name{2}) + if !any(strcmp(cellstr(A._name{2}), cellstr(B._name{2}))), + error("Incompatible column names"); + endif + endif + +endfunction Added: trunk/octave-forge/extra/dataframe/@dataframe/private/df_check_char_array.m =================================================================== --- trunk/octave-forge/extra/dataframe/@dataframe/private/df_check_char_array.m (rev 0) +++ trunk/octave-forge/extra/dataframe/@dataframe/private/df_check_char_array.m 2010-08-05 16:23:32 UTC (rev 7498) @@ -0,0 +1,52 @@ +function resu = df_check_char_array(x, nelem, required) + + %# auxiliary function: pad a char array to some width + + %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...> + %% + %% This file is part of Octave. + %% + %% Octave is free software; you can redistribute it and/or + %% modify it under the terms of the GNU General Public + %% License as published by the Free Software Foundation; + %% either version 2, or (at your option) any later version. + %% + %% Octave is distributed in the hope that it will be useful, + %% but WITHOUT ANY WARRANTY; without even the implied + %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + %% PURPOSE. See the GNU General Public License for more + %% details. + %% + %% You should have received a copy of the GNU General Public + %% License along with Octave; see the file COPYING. If not, + %% write to the Free Software Foundation, 59 Temple Place - + %% Suite 330, Boston, MA 02111-1307, USA. + + %# + %# $Id: df_check_char_array.m 852 2010-07-22 10:47:55Z dupuis $ + %# + + if 2 == nargin, required = [nelem 1]; endif + + if nelem < required(1), + error("Too many elements to assign"); + endif + + %# a zero-length element is still considered as a space by char + if isempty(x), x = ' '; endif + + if size(x, 1) < max(required(1), nelem) + %# pad vertically + dummy = repmat(' ', nelem-size(x, 1), 1); + resu = char(x, dummy); + else + resu = x; + endif + + if size(resu, 2) < required(2), + %# pad horizontally + dummy = repmat(' ', nelem, required(2)-size(resu, 2)); + resu = horzcat(resu, dummy); + endif + +endfunction Added: trunk/octave-forge/extra/dataframe/@dataframe/private/df_name2idx.m =================================================================== --- trunk/octave-forge/extra/dataframe/@dataframe/private/df_name2idx.m (rev 0) +++ trunk/octave-forge/extra/dataframe/@dataframe/private/df_name2idx.m 2010-08-05 16:23:32 UTC (rev 7498) @@ -0,0 +1,121 @@ +function [idx, nelem, subs] = df_name2idx(names, subs, count, dimname); + + %# This is a helper routine to translate rownames or columnames into + %# real index. Input: names, a char array, and subs, a cell array as + %# produced by subsref and similar. This routine can also detect + %# ranges, two values separated by ':'. On output, subs is + %# 'sanitised' from names, and is either a vector, either a single ':' + + %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...> + %% + %% This file is part of Octave. + %% + %% Octave is free software; you can redistribute it and/or + %% modify it under the terms of the GNU General Public + %% License as published by the Free Software Foundation; + %% either version 2, or (at your option) any later version. + %% + %% Octave is distributed in the hope that it will be useful, + %% but WITHOUT ANY WARRANTY; without even the implied + %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + %% PURPOSE. See the GNU General Public License for more + %% details. + %% + %% You should have received a copy of the GNU General Public + %% License along with Octave; see the file COPYING. If not, + %% write to the Free Software Foundation, 59 Temple Place - + %% Suite 330, Boston, MA 02111-1307, USA. + + %# + %# $Id: df_name2idx.m 1037 2010-08-03 16:25:05Z dupuis $ + %# + + if isempty(subs), + %# not caring about rownames ? Avoid generating an error. + idx = []; nelem = 0; return + endif + + if isa(subs, 'char'), + orig_name = subs; + if 1 == size(subs, 1), + if strcmp(subs, ':') %# range operator + idx = 1:count; nelem = count; + return + endif + endif + subs = cellstr(subs); + else + if !isvector(subs), + error("Trying to access column as a matrix"); + endif + switch class(subs) + case {"cell" } + orig_name = char(subs); + otherwise + orig_name = num2str(subs); + endswitch + endif + + if isa(subs, 'cell'), + subs = subs(:); idx = []; + %# translate list of variables to list of indices + for indi= 1:size(subs, 1), + %# regexp doesn't like empty patterns + if isempty(subs{indi}), continue, endif + %# convert from standard pattern to regexp pattern + subs{indi} = regexprep(subs{indi}, '([^\.])\*', "$1.*"); + if 0 == index(subs{indi}, ':'), + for indj = 1:min(length(names), count), %# sanity check + if ~isempty(regexp(names{indj}, subs{indi})), + idx = [idx indj]; + endif + endfor + else + dummy = strsplit( subs{indi}, ':'); + ind_start = 1; + if !isempty(dummy{1}), + ind_start = sscanf(dummy{1}, "%d"); + if isempty(ind_start), + ind_start = 1; + for indj = 1:min(length(names), count), %# sanity check + if ~isempty(regexp(names{indj}, subs{indi})), + ind_start = indj; break; %# stop at the first match + endif + endfor + endif + endif + + if isempty(dummy{2}) || strcmp(dummy{2}, 'end'), + ind_stop = count; + else + ind_stop = sscanf(dummy{2}, "%d"); + if isempty(ind_stop), + ind_stop = 1; + for indj = min(length(names), count):-1:1, %# sanity check + if ~isempty(regexp(names{indj}, subs{indi})), + ind_stop = indj; break; %# stop at the last match + endif + endfor + endif + endif + idx = [idx ind_start:ind_stop]; + endif + endfor + elseif isa(subs, 'logical'), + idx = 1:length(subs); + idx(~subs) = []; + else + idx = subs; + endif + + if isempty(idx), + keyboard + dummy = sprintf("Unknown %s name while searching for %s", ... + dimname, orig_name); + error(dummy); + endif + + subs = idx; + nelem = length(idx); + +endfunction Property changes on: trunk/octave-forge/extra/dataframe/@dataframe/private/df_name2idx.m ___________________________________________________________________ Added: svn:executable + * Added: trunk/octave-forge/extra/dataframe/@dataframe/private/df_pad.m =================================================================== --- trunk/octave-forge/extra/dataframe/@dataframe/private/df_pad.m (rev 0) +++ trunk/octave-forge/extra/dataframe/@dataframe/private/df_pad.m 2010-08-05 16:23:32 UTC (rev 7498) @@ -0,0 +1,143 @@ +function df = df_pad(df, dim, n, coltype=[]) + %# function resu = df_pad(df, dim, n, coltype = []) + %# given a dataframe, insert n rows or columns, and adjust everything + %# accordingly. Coltype is a supplemental argument: + %# dim = 1 => not used + %# dim = 2 => type of the added column(s) + %# dim = 3 => index of columns receiving a new sheet (default: all) + + %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...> + %% + %% This file is part of Octave. + %% + %% Octave is free software; you can redistribute it and/or + %% modify it under the terms of the GNU General Public + %% License as published by the Free Software Foundation; + %% either version 2, or (at your option) any later version. + %% + %% Octave is distributed in the hope that it will be useful, + %% but WITHOUT ANY WARRANTY; without even the implied + %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + %% PURPOSE. See the GNU General Public License for more + %% details. + %% + %% You should have received a copy of the GNU General Public + %% License along with Octave; see the file COPYING. If not, + %% write to the Free Software Foundation, 59 Temple Place - + %% Suite 330, Boston, MA 02111-1307, USA. + + %# + %# $Id: df_pad.m 1028 2010-08-03 10:10:26Z dupuis $ + %# + + switch dim + case 1 + if !isempty(df._name{1}), + if length(df._name{1}) < df._cnt(1)+n, + %# generate a name for the new row(s) + df._name{1}(df._cnt(1)+(1:n), 1) = {'_'}; + df._over{1}(1, df._cnt(1)+(1:n), 1) = true; + endif + endif + %# complete row indexes + if isempty(df._ridx), + dummy = (1:n).'; + else + dummy = vertcat(df._ridx, repmat(NA, n, size(df._ridx, 2))); + endif + df._ridx = dummy; + %# pad every line + if !isempty(df._data), + for indi = 1:df._cnt(2), + m = size(df._data{indi}, 2); + switch df._type{indi} + case {'char'} + dummy = {}; dummy(1:n,1:m) = NA; + dummy = vertcat(df._data{indi}, dummy); + case { 'double' } + dummy = vertcat(df._data{indi}, repmat(NA, n, m)); + otherwise + dummy = cast(vertcat(df._data{indi}, repmat(NA, n, m)), ... + df._type{indi}); + endswitch + df._data{indi} = dummy; + endfor + endif + df._cnt(1) = df._cnt(1) + n; + + case 2 + %# create new columns + if isempty(coltype) + error("df_pad: dim equals 2, and coltype undefined"); + endif + if length(n) > 1, %#second value is an offset + indc = n(2); n = n(1); + if indc < df._cnt(2), + %# shift to the right + df._name{2}(n + (indc+1:end)) = df._name{2}(indc+1:end); + dummy = cstrcat(repmat('_', n, 1), ... + strjust(num2str(indc + (1:n).'), 'left')); + df._name{2}(indc + (1:n)) = cellstr(dummy); + df._over{2}(indc + (1:n)) = true; + df._type(n+(indc+1:end)) = df._type(indc+1:end); + df._type(indc + (1:n)) = NA; + df._data(n + (indc+1:end)) = df._data(indc+1:end); + df._data(indc + (1:n)) = NA; + endif + else + indc = df._cnt(2); %# add new values after the last column + endif + if !isa(coltype, 'cell'), coltype = {coltype}; endif + for indi = (1:n), + switch coltype{indi} + case {'char'} + dummy = {repmat(NA, df._cnt(1), 1) }; + dummy(:, 1) = '_'; + case { 'double' } + dummy = repmat(NA, df._cnt(1), 1); + otherwise + dummy = cast(repmat(NA, df._cnt(1), 1), coltype{indi}); + endswitch + df._data{indc+indi} = dummy; + df._type{indc+indi} = coltype{indi}; + endfor + + if length(df._name{2}) < df._cnt(2)+n, + %# generate a name for the new column(s) + dummy = cstrcat(repmat('_', n, 1), ... + strjust(num2str(indc + (1:n).'), 'left')); + df._name{2}(indc + (1:n)) = cellstr(dummy) + df._over{2}(1, indc + (1:n)) = true; + endif + df._cnt(2) = df._cnt(2) + n; + + case 3 + if isempty(coltype), + coltype = 1:df._cnt(2); + endif + dummy = max(n+cellfun('size', df._data(coltype), 2)); + if size(df._ridx, 2) < dummy, + df._ridx(:, end+1:dummy) = NA; + endif + for indi = coltype, + switch df._type{indi} + case {'char'} + if isa(df._data{indi}, 'char'), + dummy = horzcat(df._data{indi}, {repmat(NA, df._cnt(1), n)}); + else + dummy = df._data{indi}; + endif + case { 'double' } + dummy = horzcat(df._data{indi}, repmat(NA, df._cnt(1), n)); + otherwise + dummy = cast(horzcat(df._data{indi}, repmat(NA, df._cnt(1), n)), ... + df._type{indi}); + endswitch + df._data{indi} = dummy; + endfor + df._cnt(3) = sum(cellfun('size', df._data, 2)); + otherwise + error('Invalid dimension in df_pad'); + endswitch + +endfunction Added: trunk/octave-forge/extra/dataframe/@dataframe/private/df_strjust.m =================================================================== --- trunk/octave-forge/extra/dataframe/@dataframe/private/df_strjust.m (rev 0) +++ trunk/octave-forge/extra/dataframe/@dataframe/private/df_strjust.m 2010-08-05 16:23:32 UTC (rev 7498) @@ -0,0 +1,36 @@ +function [a, b] = df_strjust(a, b) + + %# small auxiliary function: make two char arrays the same width + + %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...> + %% + %% This file is part of Octave. + %% + %% Octave is free software; you can redistribute it and/or + %% modify it under the terms of the GNU General Public + %% License as published by the Free Software Foundation; + %% either version 2, or (at your option) any later version. + %% + %% Octave is distributed in the hope that it will be useful, + %% but WITHOUT ANY WARRANTY; without even the implied + %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + %% PURPOSE. See the GNU General Public License for more + %% details. + %% + %% You should have received a copy of the GNU General Public + %% License along with Octave; see the file COPYING. If not, + %% write to the Free Software Foundation, 59 Temple Place - + %% Suite 330, Boston, MA 02111-1307, USA. + + %# + %# $Id: df_strjust.m 852 2010-07-22 10:47:55Z dupuis $ + %# + + indi = size(a, 2) - size(b, 2); + if indi < 0 + a = horzcat(repmat(' ', size(a, 1), -indi), a); + elseif indi > 0, + b = horzcat(repmat(' ', size(b, 1), indi), b); + endif + +endfunction Added: trunk/octave-forge/extra/dataframe/@dataframe/private/df_strset.m =================================================================== --- trunk/octave-forge/extra/dataframe/@dataframe/private/df_strset.m (rev 0) +++ trunk/octave-forge/extra/dataframe/@dataframe/private/df_strset.m 2010-08-05 16:23:32 UTC (rev 7498) @@ -0,0 +1,80 @@ +function [x, over] = df_strset(x, over, S, RHS, pad = ' ') + %# x = df_strset(x, over, S, RHS, pad = " ") + %# replaces the strings in cellstr x at indr by strings at y. Adapt + %# the width of x if required. Use x 'over' attribute to display a + %# message in case strings are overwritten. + + %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...> + %% + %% This file is part of Octave. + %% + %% Octave is free software; you can redistribute it and/or + %% modify it under the terms of the GNU General Public + %% License as published by the Free Software Foundation; + %% either version 2, or (at your option) any later version. + %% + %% Octave is distributed in the hope that it will be useful, + %% but WITHOUT ANY WARRANTY; without even the implied + %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + %% PURPOSE. See the GNU General Public License for more + %% details. + %% + %% You should have received a copy of the GNU General Public + %% License along with Octave; see the file COPYING. If not, + %% write to the Free Software Foundation, 59 Temple Place - + %% Suite 330, Boston, MA 02111-1307, USA. + + %# + %# $Id: df_strset.m 1020 2010-07-30 15:21:23Z dupuis $ + %# + + %# adjust x size, if required + if isnull(RHS), + %# clearing + if isempty(S), + x = cell(0, 1); over = zeros(1, 0); + return + endif + dummy = S; dummy(1).subs(2:end) = []; + over = builtin('subsasgn', over, dummy, true); + else + if isempty(S), %# complete overwrite + if ischar(RHS), RHS = cellstr(RHS); endif + nrow = length(RHS); + if any(~over(nrow)), + warning('going to overwrite names'); + endif + x(1:nrow) = RHS; + over(1:nrow) = false; + if nrow < length(x), + x(nrow+1:end) = {pad}; + endif + return + else + dummy = S(1); dummy.subs(2:end) = []; % keep first dim only + if any(~(builtin('subsref', over, dummy))); + warning('going to overwrite names'); + endif + over = builtin('subsasgn', over, dummy, false); + endif + endif + + %# common part + if ischar(RHS) && length(S(1).subs) > 1, + %# partial accesses to a char array + dummy = char(x); + dummy = builtin('subsasgn', dummy, S, RHS); + if isempty(dummy), + x = cell(0, 1); over = zeros(1, 0); + return + endif + if size(dummy, 1) == length(x), + x = cellstr(dummy); + return + endif + %# partial clearing gone wrong ? retry + RHS = { RHS }; + endif + x = builtin('subsasgn', x, S, RHS); + +endfunction Added: trunk/octave-forge/extra/dataframe/@dataframe/rationale.txt =================================================================== --- trunk/octave-forge/extra/dataframe/@dataframe/rationale.txt (rev 0) +++ trunk/octave-forge/extra/dataframe/@dataframe/rationale.txt 2010-08-05 16:23:32 UTC (rev 7498) @@ -0,0 +1,113 @@ +1) Context + +I was recently performing I-V measurements of a MOS +(Metal-Oxide-Semiconductor) structure. A full set of measurements +contained a DC biaising voltage, a AC frequency, a small signal +capacitance and conductance. I had to change a few times the +measurement device configuration, so sometimes the sweeping occured +first on frequency, then on voltage, sometimes in the reverse +order. To make it short, I had to deal with many input files with +inconsistent columns order. The code to identify this order quickly +became clumsy. + +The idea of a dataframe is to implement a mix between matrix and +cells. Its' like a matrix, where each column contains elements of the +same type. Unlike a matrix, columns type may be dissimilar. Also, +each colum MUST have a name, and rows MAY have a name. Moreover, to +make it easy to interface with databases, each row must have an unique +identifier. The goal is to make possible to use constructs like +y(:, ["Fr*"; "VB*"; "C";"G"]) +where y is the dataframe, and column selection is based on +regexp. This way, the translation between names and indexes uses all +the power of regexpes. + +2) Implementation +a dataframe is a class containing the following members: +_cnt = [0 0] : row count, column count, ... nth dimension count +_name = cell(1, 2) : row names, column names, ... +_ridx = [] : a unique Id for each row +_data = cell(0, 0) : a container for each column +_type = cell(0, 0) : the type of each column + +The constructor can be used as +- no argument: convert the whole workspace to a dataframe (TBD) +- one null argument: return an empty dataframe +- one numeric or cell argument: transform it to a dataframe; tries to +infer column names from the name of the input argument. +- one char array with more than one line: uses it as rownames +- one single line char array: take it as the name of a file to read +data from. Expected format is csv, try to be carefull with +quoted/unquoted strings, also tries to remove trailing and leading +spaces from string entries. Do not try to cope with things such as +separator INSIDE quoted strings. + +-supplemental arguments may occur either as pairs (string, value), + either as vectors. In the first case, the string contains an optional + parameter whose value is contained in the next argument. In the + second case, the argument is right-appended to the dataframe. Valid + optional parameters are + - rownames: a character array with the row names + - unquot: a logical to indicate if strings must be unquoted, default=true + - seeked: a string which must occur in the first row to start + considering values. Previous lines are skipped. + +3) Access (reading) +- like a single matrix: df(:, 3); df(3, :). If all the results are of +the same type, returns a matrix, otherwise a dataframe. This behavior +can be inhibited by having the last argument set to 'dataframe': + df(3, 3, 'dataframe') will return a one-by-one dataframe +- by columnames: + df(:, ["Fr*"; "VB*"; "C";]) + will try to match a columname beginning by "F" followed by an + optional 'r', thus 'F', 'Fréquence' and 'Freqs'; then a columname + starting by "V" with an optional "B", like f.i. "VBias", then a + columname with is the exact string 'C'. +- by rownames: same principle +- either member selector may also be logical: + df(df.OK=='A', ['C';'G']) +- as a struct: either use one of the column name (df.C), either use + one of the allowed accessor for internal fields: "rownames", + "colnames", "rowcnt", "colcnt", "rowidx", "types". Direct access to + the members like y._type is allowed, but should be restricted to + class members and friends. "types" accept both numeric and strings + arguments, the latter being converter to column order based upon + columns name. +- as a cell: TODO: define how to fill the cell array with all the + fields. + +4) Modifying +- as a matrix, using '()': use the same syntax as reading: + df(3, 'Fr*') = 200 + df(df.OK=='?', ['C'; 'G']) = NaN; + Note that removing elements may only occur on a full row of colum + basis. Removing a single element is not allowed. +- as a struct: either access a columname, as + df.C = []; + either accessing the internal fields through entry points 'rownames' + and 'colnames', where care is taken to adapt the strings width in + order to make them compatibles. The entry point "types", with + arguments numeric or strings, has the effect to cast whole column(s) + to a new type: + df.types{[3 5]} = 'uint16' + df.type{"Freq"} = "uint32" +- as a cell: TBD + +5) other overloaded functions: display, size, numel, cat. The latter +has to be thoroughfully tested. In particular, I've put the +restriction that horizontal cat requires that the row indexes are the +same for both elems. For vertical cat, how should we proceed ? Require +uniqueness of row indexes, and sorting ? Other ? + +6) to be done: +- the 'load' function is in fact contained inside the constructor; +maybe we should have a specific load function ? +- be able to load a dataframe from a URI specification +- write a simple 'save' function +- adding data to a dataframe: R doesn't seems to allow adding rows +to a data.frame, should we follow it ? +- add test cases +- implement a 'factor' class for categorised data +- make all functions below statistics/ dataframe compatible + +Pascal Dupuis +Louvain-la-Neuve, July First, 2010. Added: trunk/octave-forge/extra/dataframe/@dataframe/size.m =================================================================== --- trunk/octave-forge/extra/dataframe/@dataframe/size.m (rev 0) +++ trunk/octave-forge/extra/dataframe/@dataframe/size.m 2010-08-05 16:23:32 UTC (rev 7498) @@ -0,0 +1,60 @@ +function [nrow, ncol] = size(df, varargin) + %# function resu = end(df, varargin) + %# This is size operator for a dataframe object. + + %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...> + %% + %% This file is part of Octave. + %% + %% Octave is free software; you can redistribute it and/or + %% modify it under the terms of the GNU General Public + %% License as published by the Free Software Foundation; + %% either version 2, or (at your option) any later version. + %% + %% Octave is distributed in the hope that it will be useful, + %% but WITHOUT ANY WARRANTY; without even the implied + %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + %% PURPOSE. See the GNU General Public License for more + %% details. + %% + %% You should have received a copy of the GNU General Public + %% License along with Octave; see the file COPYING. If not, + %% write to the Free Software Foundation, 59 Temple Place - + %% Suite 330, Boston, MA 02111-1307, USA. + + %# + %# $Id: size.m 852 2010-07-22 10:47:55Z dupuis $ + %# + + switch nargin + case 1 + switch nargout + case {0 1} + nrow = df._cnt; + case {2} + nrow = df._cnt(1); ncol = df._cnt(2); + otherwise + error(print_usage()); + endswitch + case 2 + switch nargout + case {0 1} + nrow = df._cnt; + try + nrow = nrow(varargin{1}); + catch + error(print_usage()); + end_try_catch + otherwise + error(print_usage()); + endswitch + otherwise + error(print_usage()); + endswitch + +endfunction + +function usage = print_usage() + usage = strcat('Invalid call to size. Correct usage is: ', ' ', ... + '-- Overloaded Function: size (A, N)'); +endfunction Added: trunk/octave-forge/extra/dataframe/@dataframe/subsasgn.m =================================================================== --- trunk/octave-forge/extra/dataframe/@dataframe/subsasgn.m (rev 0) +++ trunk/octave-forge/extra/dataframe/@dataframe/subsasgn.m 2010-08-05 16:23:32 UTC (rev 7498) @@ -0,0 +1,500 @@ +function resu = subasgn(df, S, RHS) + %# function resu = subasgn(df, S, RHS) + %# This is the assignement operator for a dataframe object, taking + %# care of all the housekeeping of meta-info. + + %% Copyright (C) 2009-2010 Pascal Dupuis <Pas...@uc...> + %% + %% This file is part of Octave. + %% + %% Octave is free software; you can redistribute it and/or + %% modify it under the terms of the GNU General Public + %% License as published by the Free Software Foundation; + %% either version 2, or (at your option) any later version. + %% + %% Octave is distributed in the hope that it will be useful, + %% but WITHOUT ANY WARRANTY; without even the implied + %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + %% PURPOSE. See the GNU General Public License for more + %% details. + %% + %% You should have received a copy of the GNU General Public + %% License along with Octave; see the file COPYING. If not, + %% write to the Free Software Foundation, 59 Temple Place - + %% Suite 330, Boston, MA 02111-1307, USA. + + %# + %# $Id: subsasgn.m 1035 2010-08-03 16:22:58Z dupuis $ + %# + + switch(S(1).type) + case '{}' + error('Invalid dataframe as cell assignement'); + case '.' + resu = df; + %# translate the external to internal name + switch S(1).subs + case "rownames" + if !isnull(RHS) && isempty(df._name{1}), + df._name{1}(1:df._cnt(1), 1) = {''}; + df._over{1}(1, 1:df._cnt(1)) = true; + endif + [resu._name{1}, resu._over{1}] = df_strset... + (df._name{1}, df._over{1}, S(2:end), RHS); + return + + case "colnames" + if isnull(RHS), error("Colnames can't be nulled"); endif + [resu._name{2}, resu._over{2}] = df_strset... + (df._name{2}, df._over{2}, S(2:end), RHS, '_'); + return + + case "types" + if isnull(RHS), error("Types can't be nulled"); endif + if 1 == length(S), + for indi = 1:df_cnt(2), + %# perform explicit cast on each column + resu._data{indi} = cast(resu._data{indi}, RHS); + resu._type{indi} = RHS; + endfor + else + if !strcmp(S(2).type, '{}'), + error("Invalid cell access"); + endif + if length(S) > 2 || length(S(2).subs) > 1, + error("Types can only be changed as a whole"); + endif + if !isnumeric(S(2).subs{1}), + [indj, ncol, S(2).subs{1}] = df_name2idx... + (df._name{2}, S(2).subs{1}, df._cnt(2), 'column'); + endif + for indi = 1:length(indj), + %# perform explicit cast on selected columns + resu._data{indj(indi)} = cast(resu._data{indj(indi)}, RHS); + resu._type{indj(indi)} = RHS; + endfor + endif + return + + otherwise + if !ischar(S(1).subs), + error("Congratulations. I didn't see how to ... [truncated message content] |