From: <cde...@us...> - 2010-08-25 09:39:24
|
Revision: 7569 http://octave.svn.sourceforge.net/octave/?rev=7569&view=rev Author: cdemills Date: 2010-08-25 09:39:17 +0000 (Wed, 25 Aug 2010) Log Message: ----------- - the regexp to split fields rejected numerical signs and decimal part delimiter Modified Paths: -------------- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m Modified: trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m =================================================================== --- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2010-08-24 13:29:18 UTC (rev 7568) +++ trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2010-08-25 09:39:17 UTC (rev 7569) @@ -142,7 +142,9 @@ fclose(fid); end_unwind_protect lines = regexp(in,'(^|\n)([^\n]+)', 'match'); %# cut into lines - content = cellfun(@(x) regexp(x, '(\b|'')[^,]+(''|\b)', 'match'), ... + %# a field either starts at a word boundary, either by + - . for + %# a numeric data, either by ' for a string. + content = cellfun(@(x) regexp(x, '(\b|[-+\.''])[^,]+(''|\b)', 'match'), \ lines, 'UniformOutput', false); %# extract fields indl = 1; indj = 1; %# disp('line 151 '); keyboard if ~isempty(seeked), This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <cde...@us...> - 2010-08-30 08:13:23
|
Revision: 7605 http://octave.svn.sourceforge.net/octave/?rev=7605&view=rev Author: cdemills Date: 2010-08-30 08:13:17 +0000 (Mon, 30 Aug 2010) Log Message: ----------- - to append a column to a dataframe, use the syntax {{'Name', Content}} Modified Paths: -------------- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m Modified: trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m =================================================================== --- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2010-08-30 01:09:22 UTC (rev 7604) +++ trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2010-08-30 08:13:17 UTC (rev 7605) @@ -212,6 +212,9 @@ x = x{2}; indj = indc + (1:size(x, 2)); else + if isa(x{1}, 'cell'), + x = x{1}; + endif indj = df._cnt(2)+(1:size(x, 2)); endif if length(df._name{2}) < indj(1) || isempty(df._name{2}(indj)), This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <cde...@us...> - 2010-08-30 14:07:42
|
Revision: 7607 http://octave.svn.sourceforge.net/octave/?rev=7607&view=rev Author: cdemills Date: 2010-08-30 14:07:36 +0000 (Mon, 30 Aug 2010) Log Message: ----------- - commented code Modified Paths: -------------- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m Modified: trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m =================================================================== --- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2010-08-30 10:19:10 UTC (rev 7606) +++ trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2010-08-30 14:07:36 UTC (rev 7607) @@ -213,7 +213,7 @@ indj = indc + (1:size(x, 2)); else if isa(x{1}, 'cell'), - x = x{1}; + x = x{1}; %# remove one cell level endif indj = df._cnt(2)+(1:size(x, 2)); endif This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <cde...@us...> - 2010-09-30 10:11:37
|
Revision: 7782 http://octave.svn.sourceforge.net/octave/?rev=7782&view=rev Author: cdemills Date: 2010-09-30 10:11:30 +0000 (Thu, 30 Sep 2010) Log Message: ----------- - added a 'sep' argument for the elements separator - when splitting into lines, do a second pass to remove "\n" - use strsplit instead of a regexp, to permit empty fields - empty fields must not be stored in x - remove empty lines and column before further processing Modified Paths: -------------- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m Modified: trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m =================================================================== --- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2010-09-29 20:11:16 UTC (rev 7781) +++ trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2010-09-30 10:11:30 UTC (rev 7782) @@ -29,6 +29,7 @@ %# Each preceeding line is silently skipped. Default: none %# @item unquot: a logical switch telling wheter or not strings should %# be unquoted before storage, default = true; + %# @item sep: the elements separator, default ',' %# @end itemize %# The remaining data are concatenanted (right-appended) to the existing ones. %# @end deftypefn @@ -82,7 +83,7 @@ df = dataframe([]); %# get the right fields endif -seeked = []; unquot = true; +seeked = []; unquot = true; sep = ','; if length(varargin) > 0, indi = 1; @@ -112,7 +113,7 @@ df._name{2} = cellstr(num2str(varargin{indi+1})); endswitch %# detect assignment - functions calls - ranges - dummy = cellfun('size', cellfun(@(x) strsplit(x, ':=('), df._name{2}, \ + dummy = cellfun('size', cellfun(@(x) strsplit(x, ":=("), df._name{2}, \ "UniformOutput", false), 2); if any(dummy > 1), warning('dataframe colnames taken literally and not interpreted'); @@ -125,6 +126,9 @@ case 'unquot', unquot = varargin{indi + 1}; varargin(indi:indi+1) = []; + case 'sep', + sep = varargin{indi + 1}; + varargin(indi:indi+1) = []; otherwise %# FIXME: just skip it for now indi = indi + 1; endswitch @@ -152,11 +156,18 @@ fclose(fid); end_unwind_protect lines = regexp(in,'(^|\n)([^\n]+)', 'match'); %# cut into lines + lines = cellfun(@(x) regexp(x, '[^\n]*', 'match'), lines); + %#, \'UniformOutput', false); + %# remove \n %# a field either starts at a word boundary, either by + - . for %# a numeric data, either by ' for a string. - content = cellfun(@(x) regexp(x, '(\b|[-+\.''])[^,]+(''|\b)', 'match'), \ - lines, 'UniformOutput', false); %# extract fields - indl = 1; indj = 1; %# disp('line 151 '); keyboard + + %# content = cellfun(@(x) regexp(x, '(\b|[-+\.''])[^,]*(''|\b)', 'match'),\ + %# lines, 'UniformOutput', false); %# extract fields + + content = cellfun(@(x) strsplit(x, sep), lines, \ + 'UniformOutput', false); %# extract fields + indl = 1; indj = 1; %#disp('line 151 '); keyboard if ~isempty(seeked), while indl <= length(lines), dummy = content{indl}; @@ -169,8 +180,14 @@ dummy = content{indl}; endif x = cell(1+length(lines)-indl, size(dummy, 2)); + empty_lines = []; while indl <= length(lines), dummy = content{indl}; + if all(cellfun('size', dummy, 2) == 0), + empty_lines = [empty_lines indj]; + indl = indl + 1; indj = indj + 1; + continue; + endif %# try to convert to float the_line = cellfun(@(x) sscanf(x, "%f"), dummy, ... 'UniformOutput', false); @@ -185,8 +202,8 @@ in = regexp(dummy{indk}, '[^'']+', 'match'); if !isempty(in), x(indj, indk) = in{1}; - else - x(indj, indk) = []; + %# else + %# x(indj, indk) = []; endif end_try_catch else @@ -198,10 +215,18 @@ endfor indl = indl + 1; indj = indj + 1; endwhile - clear UTF8_BOM fid in lines indl the_line content + if !isempty(empty_lines), + x(empty_lines, :) = []; + endif + %# detect empty columns + empty_lines = find(0 == sum(cellfun('size', x, 2))) + if !isempty(empty_lines), + x(:, empty_lines) = []; + endif + clear UTF8_BOM fid in lines indl the_line content empty_lines end_try_catch endif - + %# fallback, avoiding a recursive call idx.type = '()'; indj = df._cnt(2)+(1:size(x, 2)); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <cde...@us...> - 2010-09-30 11:02:00
|
Revision: 7783 http://octave.svn.sourceforge.net/octave/?rev=7783&view=rev Author: cdemills Date: 2010-09-30 11:01:54 +0000 (Thu, 30 Sep 2010) Log Message: ----------- - re-added the stripping of fields initial spaces, which was de-activated by the switch from regexp -> strsplit Modified Paths: -------------- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m Modified: trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m =================================================================== --- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2010-09-30 10:11:30 UTC (rev 7782) +++ trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2010-09-30 11:01:54 UTC (rev 7783) @@ -157,8 +157,9 @@ end_unwind_protect lines = regexp(in,'(^|\n)([^\n]+)', 'match'); %# cut into lines lines = cellfun(@(x) regexp(x, '[^\n]*', 'match'), lines); + %# remove \n %#, \'UniformOutput', false); - %# remove \n + %# a field either starts at a word boundary, either by + - . for %# a numeric data, either by ' for a string. @@ -167,7 +168,7 @@ content = cellfun(@(x) strsplit(x, sep), lines, \ 'UniformOutput', false); %# extract fields - indl = 1; indj = 1; %#disp('line 151 '); keyboard + indl = 1; indj = 1; %# disp('line 151 '); keyboard if ~isempty(seeked), while indl <= length(lines), dummy = content{indl}; @@ -176,8 +177,8 @@ endif indl = indl + 1; endwhile - else - dummy = content{indl}; + %# else + %# dummy = content{indl}; endif x = cell(1+length(lines)-indl, size(dummy, 2)); empty_lines = []; @@ -188,6 +189,8 @@ indl = indl + 1; indj = indj + 1; continue; endif + %# remove leading space(s) + dummy = cellfun(@(x) regexp(x, '[^ ].*', 'match'), dummy); %# try to convert to float the_line = cellfun(@(x) sscanf(x, "%f"), dummy, ... 'UniformOutput', false); @@ -219,7 +222,7 @@ x(empty_lines, :) = []; endif %# detect empty columns - empty_lines = find(0 == sum(cellfun('size', x, 2))) + empty_lines = find(0 == sum(cellfun('size', x, 2))); if !isempty(empty_lines), x(:, empty_lines) = []; endif This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <cde...@us...> - 2012-03-27 08:17:54
|
Revision: 10073 http://octave.svn.sourceforge.net/octave/?rev=10073&view=rev Author: cdemills Date: 2012-03-27 08:17:47 +0000 (Tue, 27 Mar 2012) Log Message: ----------- - when the separator is ' ', use the form of strsplit wich consider repetitions of the separator as one single value Modified Paths: -------------- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m Modified: trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m =================================================================== --- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2012-03-26 19:46:13 UTC (rev 10072) +++ trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2012-03-27 08:17:47 UTC (rev 10073) @@ -210,9 +210,15 @@ %# content = cellfun(@(x) regexp(x, '(\b|[-+\.''])[^,]*(''|\b)', 'match'),\ %# lines, 'UniformOutput', false); %# extract fields - content = cellfun (@(x) strsplit (x, sep), lines, \ - 'UniformOutput', false); %# extract fields - indl = 1; indj = 1; %#disp('line 151 '); keyboard + + if (strfind (sep, ' ')) + content = cellfun (@(x) strsplit (x, sep, true), lines, \ + 'UniformOutput', false); %# extract fields + else + content = cellfun (@(x) strsplit (x, sep), lines, \ + 'UniformOutput', false); %# extract fields + endif + indl = 1; indj = 1; %# disp('line 151 '); keyboard if (~isempty (seeked)) while (indl <= length (lines)) dummy = content{indl}; @@ -239,7 +245,7 @@ endif if (size (dummy, 2) >= 2 && ... ~isempty (regexp (dummy{2}, trigger, 'match'))) - %#was (strcmp (dummy{1}, trigger)) + %# was (strcmp (dummy{1}, trigger)) break; endif endwhile @@ -260,9 +266,16 @@ indl = indl + 1; indj = indj + 1; continue; endif + %# try to convert to float - the_line = cellfun (@(x) sscanf (x, "%f", locales), dummy, \ - 'UniformOutput', false); + if (1) + the_line = cellfun (@(x) sscanf (x, "%f", locales), dummy, \ + 'UniformOutput', false); + else + the_line = sscanf (dummy, "%f", locales); + the_line = cellfun (@(x) x{1}, the_line, 'UniformOutput', false); + endif + for indk = (1:size (the_line, 2)) if (isempty (the_line{indk}) || any (size (the_line{indk}) > 1)) %#if indi > 1 && indk > 1, disp('line 117 '); keyboard; %#endif @@ -284,11 +297,12 @@ x(indj, indk) = regexp (dummy{indk}, '[^ ].*', 'match'); endif else - if (~isempty (regexp (dummy{indk}, '[/:]'))) + if (~isempty (regexp (dummy{indk}, '[/:-]'))) %# try to convert to a date [timeval, nfields] = strptime( dummy{indk}, [char(37) 'd/' char(37) 'm/' char(37) 'Y ' char(37) 'T']); if (nfields > 0) %# at least a few fields are OK + keyboard timestr = strftime ([char(37) 'H:' char(37) 'M:' char(37) 'S'], timeval); %# try to extract the usec field, if any @@ -302,6 +316,9 @@ endif x(indj, indk) = str2num (strftime ([char(37) 's'], timeval)) + ... timeval.usec * 1e-6; + else + %# store it as is + x(indj, indk) = the_line{indk}; endif else x(indj, indk) = the_line{indk}; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <cde...@us...> - 2010-09-30 14:37:13
|
Revision: 7784 http://octave.svn.sourceforge.net/octave/?rev=7784&view=rev Author: cdemills Date: 2010-09-30 14:37:04 +0000 (Thu, 30 Sep 2010) Log Message: ----------- - tried to solve once (and for a long time) this stupid problems of EOL conventions Modified Paths: -------------- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m Modified: trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m =================================================================== --- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2010-09-30 11:01:54 UTC (rev 7783) +++ trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2010-09-30 14:37:04 UTC (rev 7784) @@ -130,6 +130,7 @@ sep = varargin{indi + 1}; varargin(indi:indi+1) = []; otherwise %# FIXME: just skip it for now + disp(sprintf("Ignoring unkown argument %s", varargin{indi})); indi = indi + 1; endswitch endwhile @@ -155,17 +156,18 @@ unwind_protect_cleanup fclose(fid); end_unwind_protect - lines = regexp(in,'(^|\n)([^\n]+)', 'match'); %# cut into lines - lines = cellfun(@(x) regexp(x, '[^\n]*', 'match'), lines); - %# remove \n - %#, \'UniformOutput', false); + eol = '\r\n|\n|\x0b|\f|\r|\x85'; + %# cut into lines -- include the EOL to have a one-to-one + %# matching between line numbers + lines = regexp(in, ['(^|' eol ')([^' eol ']+)'], 'match'); + %# remove the EOL character(s) + lines = cellfun(@(x) regexp(x, ['[^' eol ']+'], 'match'), lines); %# a field either starts at a word boundary, either by + - . for %# a numeric data, either by ' for a string. %# content = cellfun(@(x) regexp(x, '(\b|[-+\.''])[^,]*(''|\b)', 'match'),\ %# lines, 'UniformOutput', false); %# extract fields - content = cellfun(@(x) strsplit(x, sep), lines, \ 'UniformOutput', false); %# extract fields indl = 1; indj = 1; %# disp('line 151 '); keyboard @@ -233,7 +235,7 @@ %# fallback, avoiding a recursive call idx.type = '()'; indj = df._cnt(2)+(1:size(x, 2)); - + if iscell(x), if 2 == length(x), %# use the intermediate value as destination column This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <cde...@us...> - 2010-09-30 14:47:10
|
Revision: 7785 http://octave.svn.sourceforge.net/octave/?rev=7785&view=rev Author: cdemills Date: 2010-09-30 14:47:00 +0000 (Thu, 30 Sep 2010) Log Message: ----------- - only perform a switch() on varargin entries which are char Modified Paths: -------------- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m Modified: trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m =================================================================== --- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2010-09-30 14:37:04 UTC (rev 7784) +++ trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2010-09-30 14:47:00 UTC (rev 7785) @@ -89,50 +89,54 @@ indi = 1; %# loop over possible arguments while indi <= size(varargin, 2), - switch(varargin{indi}) - case 'rownames' - switch class(varargin{indi+1}) - case {'cell'} - df._name{1} = varargin{indi+1}; - case {'char'} - df._name{1} = cellstr(varargin{indi+1}); - otherwise - df._name{1} = cellstr(num2str(varargin{indi+1})); - endswitch - df._over{1}(1, 1:length(df._name{1})) = false; - df._cnt(1) = size(df._name{1}, 1); - df._ridx = (1:df._cnt(1))'; - varargin(indi:indi+1) = []; - case 'colnames' - switch class(varargin{indi+1}) - case {'cell'} - df._name{2} = varargin{indi+1}; - case {'char'} - df._name{2} = cellstr(varargin{indi+1}); - otherwise - df._name{2} = cellstr(num2str(varargin{indi+1})); - endswitch - %# detect assignment - functions calls - ranges - dummy = cellfun('size', cellfun(@(x) strsplit(x, ":=("), df._name{2}, \ - "UniformOutput", false), 2); - if any(dummy > 1), - warning('dataframe colnames taken literally and not interpreted'); - endif - df._over{2}(1, 1:length(df._name{2})) = false; - varargin(indi:indi+1) = []; - case 'seeked', - seeked = varargin{indi + 1}; - varargin(indi:indi+1) = []; - case 'unquot', - unquot = varargin{indi + 1}; - varargin(indi:indi+1) = []; - case 'sep', - sep = varargin{indi + 1}; - varargin(indi:indi+1) = []; - otherwise %# FIXME: just skip it for now - disp(sprintf("Ignoring unkown argument %s", varargin{indi})); - indi = indi + 1; - endswitch + if isa(varargin{indi}, 'char'), + switch(varargin{indi}) + case 'rownames' + switch class(varargin{indi+1}) + case {'cell'} + df._name{1} = varargin{indi+1}; + case {'char'} + df._name{1} = cellstr(varargin{indi+1}); + otherwise + df._name{1} = cellstr(num2str(varargin{indi+1})); + endswitch + df._over{1}(1, 1:length(df._name{1})) = false; + df._cnt(1) = size(df._name{1}, 1); + df._ridx = (1:df._cnt(1))'; + varargin(indi:indi+1) = []; + case 'colnames' + switch class(varargin{indi+1}) + case {'cell'} + df._name{2} = varargin{indi+1}; + case {'char'} + df._name{2} = cellstr(varargin{indi+1}); + otherwise + df._name{2} = cellstr(num2str(varargin{indi+1})); + endswitch + %# detect assignment - functions calls - ranges + dummy = cellfun('size', cellfun(@(x) strsplit(x, ":=("), df._name{2}, \ + "UniformOutput", false), 2); + if any(dummy > 1), + warning('dataframe colnames taken literally and not interpreted'); + endif + df._over{2}(1, 1:length(df._name{2})) = false; + varargin(indi:indi+1) = []; + case 'seeked', + seeked = varargin{indi + 1}; + varargin(indi:indi+1) = []; + case 'unquot', + unquot = varargin{indi + 1}; + varargin(indi:indi+1) = []; + case 'sep', + sep = varargin{indi + 1}; + varargin(indi:indi+1) = []; + otherwise %# FIXME: just skip it for now + disp(sprintf("Ignoring unkown argument %s", varargin{indi})); + indi = indi + 1; + endswitch + else + indi = indi + 1; %# skip it + endif endwhile endif This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <cde...@us...> - 2010-10-01 07:09:10
|
Revision: 7787 http://octave.svn.sourceforge.net/octave/?rev=7787&view=rev Author: cdemills Date: 2010-10-01 07:09:03 +0000 (Fri, 01 Oct 2010) Log Message: ----------- - leading space(s) are now removed directly inside the regexp used to remove quotes Modified Paths: -------------- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m Modified: trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m =================================================================== --- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2010-09-30 20:20:14 UTC (rev 7786) +++ trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2010-10-01 07:09:03 UTC (rev 7787) @@ -195,20 +195,19 @@ indl = indl + 1; indj = indj + 1; continue; endif - %# remove leading space(s) - dummy = cellfun(@(x) regexp(x, '[^ ].*', 'match'), dummy); %# try to convert to float - the_line = cellfun(@(x) sscanf(x, "%f"), dummy, ... + the_line = cellfun(@(x) sscanf(x, "%f"), dummy, \ 'UniformOutput', false); for indk = 1: size(the_line, 2), if isempty(the_line{indk}) || any(size(the_line{indk}) > 1), - %#if indi > 1 && indk > 1, disp('line 117 '); keyboard; endif + %#if indi > 1 && indk > 1, disp('line 117 '); keyboard; %#endif if unquot, try - x(indj, indk) = regexp(dummy{indk}, '[^''].*[^'']', 'match'){1}; + %# remove quotes and leading space(s) + x(indj, indk) = regexp(dummy{indk}, '[^'' ].*[^'']', 'match'){1}; catch %# if the previous test fails, try a simpler one - in = regexp(dummy{indk}, '[^'']+', 'match'); + in = regexp(dummy{indk}, '[^'' ]+', 'match'); if !isempty(in), x(indj, indk) = in{1}; %# else @@ -216,7 +215,8 @@ endif end_try_catch else - x(indj, indk) = dummy{indk}; %# no conversion possible + %# no conversion possible, store and remove leading space(s) + x(indj, indk) = regexp(dummy{indk}, '[^ ].*', 'match'); endif else x(indj, indk) = the_line{indk}; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <cde...@us...> - 2010-10-26 13:56:19
|
Revision: 7875 http://octave.svn.sourceforge.net/octave/?rev=7875&view=rev Author: cdemills Date: 2010-10-26 13:56:12 +0000 (Tue, 26 Oct 2010) Log Message: ----------- Added <TAB> as default delimiter; skip lines whose first non-blank char is # Modified Paths: -------------- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m Modified: trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m =================================================================== --- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2010-10-25 18:56:04 UTC (rev 7874) +++ trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2010-10-26 13:56:12 UTC (rev 7875) @@ -29,7 +29,7 @@ %# Each preceeding line is silently skipped. Default: none %# @item unquot: a logical switch telling wheter or not strings should %# be unquoted before storage, default = true; - %# @item sep: the elements separator, default ',' + %# @item sep: the elements separator, default '\t,' %# @end itemize %# The remaining data are concatenanted (right-appended) to the existing ones. %# @end deftypefn @@ -83,7 +83,7 @@ df = dataframe([]); %# get the right fields endif -seeked = []; unquot = true; sep = ','; +seeked = []; unquot = true; sep = "\t,"; if length(varargin) > 0, indi = 1; @@ -195,6 +195,12 @@ indl = indl + 1; indj = indj + 1; continue; endif + %# does it looks like a comment line ? + if regexp(dummy{1}, '^\s*#'), + empty_lines = [empty_lines indj]; + indl = indl + 1; indj = indj + 1; + continue; + endif %# try to convert to float the_line = cellfun(@(x) sscanf(x, "%f"), dummy, \ 'UniformOutput', false); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <cde...@us...> - 2010-10-26 15:02:14
|
Revision: 7877 http://octave.svn.sourceforge.net/octave/?rev=7877&view=rev Author: cdemills Date: 2010-10-26 15:02:08 +0000 (Tue, 26 Oct 2010) Log Message: ----------- Added the ability to specify an empty input and colnames, to pre-dimension the dataframe Modified Paths: -------------- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m Modified: trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m =================================================================== --- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2010-10-26 14:38:32 UTC (rev 7876) +++ trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2010-10-26 15:02:08 UTC (rev 7877) @@ -63,7 +63,7 @@ return endif -if isempty(x), +if isempty(x) && 1 == nargin, %# default constructor: initialise the fields in the right order df._cnt = [0 0]; df._name = {cell(0, 1), cell(1, 0)}; %# rows - cols @@ -196,7 +196,7 @@ continue; endif %# does it looks like a comment line ? - if regexp(dummy{1}, '^\s*#'), + if regexp(dummy{1}, ['^\s*' char(35)]), empty_lines = [empty_lines indj]; indl = indl + 1; indj = indj + 1; continue; @@ -274,14 +274,20 @@ %# allow overwriting of column names df._over{2}(1, indj) = true; else - if length(df._name{2}) < indj(1) || isempty(df._name{2}(indj)), - [df._name{2}(indj, 1), df._over{2}(1, indj)] ... - = df_colnames(inputname(indi), indj); + if !isempty(indj), + if length(df._name{2}) < indj(1) || isempty(df._name{2}(indj)), + [df._name{2}(indj, 1), df._over{2}(1, indj)] ... + = df_colnames(inputname(indi), indj); + endif endif endif - idx.subs = {'', indj}; - %# use direct assignement - df = subsasgn(df, idx, x); + if !isempty(indj), + idx.subs = {'', indj}; + %# use direct assignement + df = subsasgn(df, idx, x); + else + df._cnt(2) = length(df._name{2}); + endif elseif indi > 1, error('Concatenating dataframes: use cat instead'); endif This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <cde...@us...> - 2011-10-07 12:45:37
|
Revision: 8708 http://octave.svn.sourceforge.net/octave/?rev=8708&view=rev Author: cdemills Date: 2011-10-07 12:45:26 +0000 (Fri, 07 Oct 2011) Log Message: ----------- Add a new parameter, 'locales', to specify in which locales to interpret the data Modified Paths: -------------- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m Modified: trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m =================================================================== --- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2011-10-07 08:04:32 UTC (rev 8707) +++ trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2011-10-07 12:45:26 UTC (rev 8708) @@ -90,6 +90,7 @@ %# default values seeked = []; trigger =[]; unquot = true; sep = "\t,"; cmt_lines = []; +locales = "C"; if (length(varargin) > 0) indi = 1; @@ -141,6 +142,9 @@ case 'sep', sep = varargin{indi + 1}; varargin(indi:indi+1) = []; + case 'locales' + locales = varargin{indi + 1}; + varargin(indi:indi+1) = []; otherwise %# FIXME: just skip it for now disp (sprintf ("Ignoring unkown argument %s", varargin{indi})); indi = indi + 1; @@ -251,7 +255,7 @@ continue; endif %# try to convert to float - the_line = cellfun (@(x) sscanf (x, "%f", ""), dummy, \ + the_line = cellfun (@(x) sscanf (x, "%f", locales), dummy, \ 'UniformOutput', false); for indk = (1:size (the_line, 2)) if (isempty (the_line{indk}) || any (size (the_line{indk}) > 1)) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <cde...@us...> - 2011-12-13 18:05:23
|
Revision: 9376 http://octave.svn.sourceforge.net/octave/?rev=9376&view=rev Author: cdemills Date: 2011-12-13 18:05:12 +0000 (Tue, 13 Dec 2011) Log Message: ----------- Added the ability to detect a date; transform it to the number of seconds since Jan 1, 1970 Modified Paths: -------------- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m Modified: trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m =================================================================== --- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2011-12-13 15:34:25 UTC (rev 9375) +++ trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2011-12-13 18:05:12 UTC (rev 9376) @@ -278,7 +278,28 @@ x(indj, indk) = regexp (dummy{indk}, '[^ ].*', 'match'); endif else - x(indj, indk) = the_line{indk}; + if (!isempty (regexp (dummy{indk}, '[/:]'))) + %# try to convert to a date + [timeval, nfields] = strptime( dummy{indk}, + [char(37) 'd/' char(37) 'm/' char(37) 'Y ' char(37) 'T']); + if (nfields > 0) %# at least a few fields are OK + timestr = strftime ([char(37) 'H:' char(37) 'M:' char(37) 'S'], + timeval); + %# try to extract the usec field, if any + idx = regexp (dummy{indk}, timestr, 'end'); + if (!isempty (idx)) + idx = idx + 1; + if (ispunct (dummy{indk}(idx))) + idx = idx + 1; + endif + timeval.usec = str2num(dummy{indk}(idx:end)); + endif + x(indj, indk) = str2num (strftime ([char(37) 's'], timeval)) + ... + timeval.usec * 1e-6; + endif + else + x(indj, indk) = the_line{indk}; + endif endif endfor indl = indl + 1; indj = indj + 1; @@ -292,6 +313,7 @@ x(:, empty_lines) = []; endif clear UTF8_BOM fid in lines indl the_line content empty_lines + clear timeval timestr nfields idx endif end_try_catch endif This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <cde...@us...> - 2012-03-07 08:05:19
|
Revision: 9759 http://octave.svn.sourceforge.net/octave/?rev=9759&view=rev Author: cdemills Date: 2012-03-07 08:05:08 +0000 (Wed, 07 Mar 2012) Log Message: ----------- Search for trigger in the two first fields, if any Modified Paths: -------------- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m Modified: trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m =================================================================== --- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2012-03-07 08:04:20 UTC (rev 9758) +++ trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2012-03-07 08:05:08 UTC (rev 9759) @@ -233,9 +233,15 @@ if (all (cellfun ('size', dummy, 2) == 0)) continue; endif - if (strcmp (dummy{1}, trigger)) + if (size (dummy, 2) >= 1 && ... + ~isempty (regexp (dummy{1}, trigger, 'match'))) break; endif + if (size (dummy, 2) >= 2 && ... + ~isempty (regexp (dummy{2}, trigger, 'match'))) + %#was (strcmp (dummy{1}, trigger)) + break; + endif endwhile endif x = cell (1+length (lines)-indl, size(dummy, 2)); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <cde...@us...> - 2012-03-27 08:21:56
|
Revision: 10074 http://octave.svn.sourceforge.net/octave/?rev=10074&view=rev Author: cdemills Date: 2012-03-27 08:21:45 +0000 (Tue, 27 Mar 2012) Log Message: ----------- - added a comment about specific part of code Modified Paths: -------------- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m Modified: trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m =================================================================== --- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2012-03-27 08:17:47 UTC (rev 10073) +++ trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2012-03-27 08:21:45 UTC (rev 10074) @@ -272,6 +272,8 @@ the_line = cellfun (@(x) sscanf (x, "%f", locales), dummy, \ 'UniformOutput', false); else + %# this code require a patch to src/file-io.cc in main + %# Octave tree the_line = sscanf (dummy, "%f", locales); the_line = cellfun (@(x) x{1}, the_line, 'UniformOutput', false); endif This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <cde...@us...> - 2012-03-28 15:46:06
|
Revision: 10084 http://octave.svn.sourceforge.net/octave/?rev=10084&view=rev Author: cdemills Date: 2012-03-28 15:46:00 +0000 (Wed, 28 Mar 2012) Log Message: ----------- - new parameter, datefmt, and MUCH simpler date processing Modified Paths: -------------- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m Modified: trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m =================================================================== --- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2012-03-28 14:39:57 UTC (rev 10083) +++ trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2012-03-28 15:46:00 UTC (rev 10084) @@ -27,6 +27,7 @@ %# @item colnames : take the values as initialiser for column names %# @item seeked : a (kept) field value which triggers start of processing. %# @item trigger : a (unkept) field value which triggers start of processing. + %# @item datefmt: date format, see datestr help %# Each preceeding line is silently skipped. Default: none %# @item unquot: a logical switch telling wheter or not strings should %# be unquoted before storage, default = true; @@ -90,7 +91,7 @@ %# default values seeked = []; trigger =[]; unquot = true; sep = "\t,"; cmt_lines = []; -locales = "C"; +locales = "C"; datefmt = ''; if (length (varargin) > 0) indi = 1; @@ -145,6 +146,9 @@ case 'locales' locales = varargin{indi + 1}; varargin(indi:indi+1) = []; + case 'datefmt' + datefmt = varargin{indi + 1}; + varargin(indi:indi+1) = []; otherwise %# FIXME: just skip it for now disp (sprintf ("Ignoring unkown argument %s", varargin{indi})); indi = indi + 1; @@ -160,10 +164,10 @@ endif indi = 0; -while (indi <= size(varargin, 2)) +while (indi <= size (varargin, 2)) indi = indi + 1; if (~isa (x, 'dataframe')) - if (isa(x, 'char') && size(x, 1) < 2) + if (isa (x, 'char') && size (x, 1) < 2) %# read the data frame from a file try dummy = tilde_expand (x); @@ -171,7 +175,7 @@ df._src{end+1, 1} = dummy; catch %# try our own method - UTF8_BOM = char([0xEF 0xBB 0xBF]); + UTF8_BOM = char ([0xEF 0xBB 0xBF]); unwind_protect dummy = tilde_expand (x); fid = fopen (dummy); @@ -250,7 +254,7 @@ endif endwhile endif - x = cell (1+length (lines)-indl, size(dummy, 2)); + x = cell (1+length (lines)-indl, size (dummy, 2)); empty_lines = []; cmt_lines = []; while (indl <= length (lines)) dummy = content{indl}; @@ -268,9 +272,9 @@ endif %# try to convert to float - if (1) - the_line = cellfun (@(x) sscanf (x, "%f", locales), dummy, \ - 'UniformOutput', false); + if (1) + the_line = cellfun (@(x) sscanf (x, "%f", locales), dummy, \ + 'UniformOutput', false); else %# this code require a patch to src/file-io.cc in main %# Octave tree @@ -299,29 +303,22 @@ x(indj, indk) = regexp (dummy{indk}, '[^ ].*', 'match'); endif else - if (~isempty (regexp (dummy{indk}, '[/:-]'))) - %# try to convert to a date - [timeval, nfields] = strptime( dummy{indk}, - [char(37) 'd/' char(37) 'm/' char(37) 'Y ' char(37) 'T']); - if (nfields > 0) %# at least a few fields are OK - keyboard - timestr = strftime ([char(37) 'H:' char(37) 'M:' char(37) 'S'], - timeval); - %# try to extract the usec field, if any - idx = regexp (dummy{indk}, timestr, 'end'); - if (~isempty (idx)) - idx = idx + 1; - if (ispunct (dummy{indk}(idx))) - idx = idx + 1; - endif - timeval.usec = str2num(dummy{indk}(idx:end)); - endif + if (~isempty (regexp (dummy{indk}, '[/:-]')) && ... + ~isempty (datefmt)) + + try + datetime = datevec (dummy{indk}, datefmt); + timeval = struct ("usec", 0, "sec", floor (datetime (6)), + "min", datetime(5), "hour", datetime(4), + "mday", datetime(3), "mon", datetime(2)-1, + "year", datetime(1)-1900); + timeval.usec = 1e6*(datetime(6)-timeval.sec); x(indj, indk) = str2num (strftime ([char(37) 's'], timeval)) + ... timeval.usec * 1e-6; - else + catch %# store it as is x(indj, indk) = the_line{indk}; - endif + end_try_catch else x(indj, indk) = the_line{indk}; endif @@ -349,10 +346,10 @@ indj = df._cnt(2)+(1:size (x, 2)); else %# at this point, reading some filename failed - error("dataframe: can't open '%s' for reading data", x); + error ("dataframe: can't open '%s' for reading data", x); endif; - if (iscell(x)) + if (iscell (x)) if (2 == length (x)) %# use the intermediate value as destination column [indc, ncol] = df_name2idx (df._name{2}, x{1}, df._cnt(2), "column"); @@ -367,7 +364,7 @@ end_try_catch df = df_pad (df, 2, [length(dummy) indc], dummy); x = x{2}; - indj = indc + (1:size(x, 2)); %# redefine target range + indj = indc + (1:size (x, 2)); %# redefine target range else if (isa (x{1}, 'cell')) x = x{1}; %# remove one cell level @@ -385,7 +382,7 @@ if (1 == length (df._name{2}) && length (df._name{2}) < \ length (indj)) [df._name{2}(indj, 1), df._over{2}(1, indj)] ... - = df_colnames (char(df._name{2}), indj); + = df_colnames (char (df._name{2}), indj); elseif (length (df._name{2}) < indj(1) || isempty (df._name{2}(indj))) [df._name{2}(indj, 1), df._over{2}(1, indj)] ... = df_colnames (inputname(indi), indj); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <cde...@us...> - 2012-03-29 07:34:49
|
Revision: 10088 http://octave.svn.sourceforge.net/octave/?rev=10088&view=rev Author: cdemills Date: 2012-03-29 07:34:43 +0000 (Thu, 29 Mar 2012) Log Message: ----------- - Replaced a few else if by elseif Modified Paths: -------------- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m Modified: trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m =================================================================== --- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2012-03-28 20:30:21 UTC (rev 10087) +++ trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2012-03-29 07:34:43 UTC (rev 10088) @@ -90,7 +90,7 @@ endif %# default values -seeked = []; trigger =[]; unquot = true; sep = "\t,"; cmt_lines = []; +seeked = []; trigger = []; unquot = true; sep = "\t,"; cmt_lines = []; locales = "C"; datefmt = ''; if (length (varargin) > 0) @@ -125,8 +125,8 @@ %# detect assignment - functions calls - ranges dummy = cellfun ('size', cellfun (@(x) strsplit (x, ":=("), df._name{2}, \ "UniformOutput", false), 2); - if (any(dummy > 1)) - warning('dataframe colnames taken literally and not interpreted'); + if (any (dummy > 1)) + warning ('dataframe colnames taken literally and not interpreted'); endif df._name{2} = genvarname (df._name{2}); df._over{2}(1, 1:length (df._name{2})) = false; @@ -276,10 +276,11 @@ the_line = cellfun (@(x) sscanf (x, "%f", locales), dummy, \ 'UniformOutput', false); else - %# this code require a patch to src/file-io.cc in main - %# Octave tree + %# this faster code requires a patch to src/file-io.cc in + %# main Octave tree the_line = sscanf (dummy, "%f", locales); - the_line = cellfun (@(x) x{1}, the_line, 'UniformOutput', false); + the_line = cellfun (@(x) x{1}, the_line, \ + 'UniformOutput', false); endif for indk = (1:size (the_line, 2)) @@ -302,40 +303,41 @@ %# no conversion possible, store and remove leading space(s) x(indj, indk) = regexp (dummy{indk}, '[^ ].*', 'match'); endif + elseif (~isempty (regexp (dummy{indk}, '[/:-]')) && ... + ~isempty (datefmt)) + try + datetime = datevec (dummy{indk}, datefmt); + timeval = struct ("usec", 0, "sec", floor (datetime (6)), + "min", datetime(5), "hour", datetime(4), + "mday", datetime(3), "mon", datetime(2)-1, + "year", datetime(1)-1900); + timeval.usec = 1e6*(datetime(6)-timeval.sec); + x(indj, indk) = str2num (strftime ([char(37) 's'], timeval)) + ... + timeval.usec * 1e-6; + catch + %# store it as is + x(indj, indk) = the_line{indk}; + end_try_catch else - if (~isempty (regexp (dummy{indk}, '[/:-]')) && ... - ~isempty (datefmt)) - - try - datetime = datevec (dummy{indk}, datefmt); - timeval = struct ("usec", 0, "sec", floor (datetime (6)), - "min", datetime(5), "hour", datetime(4), - "mday", datetime(3), "mon", datetime(2)-1, - "year", datetime(1)-1900); - timeval.usec = 1e6*(datetime(6)-timeval.sec); - x(indj, indk) = str2num (strftime ([char(37) 's'], timeval)) + ... - timeval.usec * 1e-6; - catch - %# store it as is - x(indj, indk) = the_line{indk}; - end_try_catch - else - x(indj, indk) = the_line{indk}; - endif + x(indj, indk) = the_line{indk}; endif endfor indl = indl + 1; indj = indj + 1; endwhile + if (~isempty (empty_lines)) x(empty_lines, :) = []; endif + %# detect empty columns empty_lines = find (0 == sum (cellfun ('size', x, 2))); if (~isempty (empty_lines)) x(:, empty_lines) = []; endif + clear UTF8_BOM fid in lines indl the_line content empty_lines clear timeval timestr nfields idx + endif end_try_catch endif @@ -365,11 +367,10 @@ df = df_pad (df, 2, [length(dummy) indc], dummy); x = x{2}; indj = indc + (1:size (x, 2)); %# redefine target range - else - if (isa (x{1}, 'cell')) - x = x{1}; %# remove one cell level - endif + elseif (isa (x{1}, 'cell')) + x = x{1}; %# remove one cell level endif + if (length (df._name{2}) < indj(1) || isempty (df._name{2}(indj))) [df._name{2}(indj, 1), df._over{2}(1, indj)] ... = df_colnames (inputname(indi), indj); @@ -377,19 +378,19 @@ endif %# allow overwriting of column names df._over{2}(1, indj) = true; - else - if (~isempty (indj)) - if (1 == length (df._name{2}) && length (df._name{2}) < \ - length (indj)) - [df._name{2}(indj, 1), df._over{2}(1, indj)] ... - = df_colnames (char (df._name{2}), indj); - elseif (length (df._name{2}) < indj(1) || isempty (df._name{2}(indj))) - [df._name{2}(indj, 1), df._over{2}(1, indj)] ... - = df_colnames (inputname(indi), indj); - endif - df._name{2} = genvarname (df._name{2}); + + elseif (~isempty (indj)) + if (1 == length (df._name{2}) && length (df._name{2}) < \ + length (indj)) + [df._name{2}(indj, 1), df._over{2}(1, indj)] ... + = df_colnames (char (df._name{2}), indj); + elseif (length (df._name{2}) < indj(1) || isempty (df._name{2}(indj))) + [df._name{2}(indj, 1), df._over{2}(1, indj)] ... + = df_colnames (inputname(indi), indj); endif + df._name{2} = genvarname (df._name{2}); endif + if (~isempty (indj)) %# the exact row size will be determined latter idx.subs = {'', indj}; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <cde...@us...> - 2012-03-29 15:09:35
|
Revision: 10095 http://octave.svn.sourceforge.net/octave/?rev=10095&view=rev Author: cdemills Date: 2012-03-29 15:09:25 +0000 (Thu, 29 Mar 2012) Log Message: ----------- - If the datefmt contains spaces, then concatenate more than one field. Modified Paths: -------------- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m Modified: trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m =================================================================== --- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2012-03-29 15:08:03 UTC (rev 10094) +++ trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2012-03-29 15:09:25 UTC (rev 10095) @@ -159,6 +159,14 @@ endwhile endif +if (~isempty (datefmt)) + %# replace consecutive spaces by one + datefmt = regexprep (datefmt, '[ ]+', ' '); + datefields = 1 + length (regexp (datefmt, ' ')); +else + datefields = 1; +endif + if (~isempty (seeked) && ~isempty (trigger)) error ('seeked and trigger are mutuallly incompatible arguments'); endif @@ -283,45 +291,62 @@ 'UniformOutput', false); endif - for indk = (1:size (the_line, 2)) + indk = 1; indm = 1; + while (indk <= size (the_line, 2)) if (isempty (the_line{indk}) || any (size (the_line{indk}) > 1)) %#if indi > 1 && indk > 1, disp('line 117 '); keyboard; %#endif if (unquot) try %# remove quotes and leading space(s) - x(indj, indk) = regexp (dummy{indk}, '[^'' ].*[^'']', 'match'){1}; + x(indj, indm) = regexp (dummy{indk}, '[^'' ].*[^'']', 'match'){1}; catch %# if the previous test fails, try a simpler one in = regexp (dummy{indk}, '[^'' ]+', 'match'); if (~isempty (in)) - x(indj, indk) = in{1}; + x(indj, indm) = in{1}; %# else %# x(indj, indk) = []; endif end_try_catch else %# no conversion possible, store and remove leading space(s) - x(indj, indk) = regexp (dummy{indk}, '[^ ].*', 'match'); + x(indj, indm) = regexp (dummy{indk}, '[^ ].*', 'match'); endif elseif (~isempty (regexp (dummy{indk}, '[/:-]')) && ... ~isempty (datefmt)) + %# does it look like a date ? + datetime = dummy{indk}; + + if (datefields > 1) + %# concatenate the required number of fields + indc = 1; + for indc = (2:datefields) + datetime = cstrcat(datetime, ' ', dummy{indk+indc-1}); + endfor + endif + try - datetime = datevec (dummy{indk}, datefmt); + datetime = datevec (datetime, datefmt); timeval = struct ("usec", 0, "sec", floor (datetime (6)), "min", datetime(5), "hour", datetime(4), "mday", datetime(3), "mon", datetime(2)-1, "year", datetime(1)-1900); - timeval.usec = 1e6*(datetime(6)-timeval.sec); - x(indj, indk) = str2num (strftime ([char(37) 's'], timeval)) + ... + timeval.usec = 1e6*(datetime(6) - timeval.sec); + x(indj, indm) = str2num (strftime ([char(37) 's'], timeval)) + ... timeval.usec * 1e-6; + if (datefields > 1) + %# skip fields successfully converted + indk = indk + (datefields - 1); + endif catch %# store it as is - x(indj, indk) = the_line{indk}; + x(indj, indm) = the_line{indk}; end_try_catch else - x(indj, indk) = the_line{indk}; + x(indj, indm) = the_line{indk}; endif - endfor + indk = indk + 1; indm = indm + 1; + endwhile indl = indl + 1; indj = indj + 1; endwhile @@ -336,12 +361,12 @@ endif clear UTF8_BOM fid in lines indl the_line content empty_lines - clear timeval timestr nfields idx + clear datetime timeval idx endif end_try_catch endif - + %# fallback, avoiding a recursive call idx.type = '()'; if (~isa (x, 'char')) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <cde...@us...> - 2012-03-29 15:21:56
|
Revision: 10096 http://octave.svn.sourceforge.net/octave/?rev=10096&view=rev Author: cdemills Date: 2012-03-29 15:21:47 +0000 (Thu, 29 Mar 2012) Log Message: ----------- - Take more than one field for datefmt iff sep contains space Modified Paths: -------------- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m Modified: trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m =================================================================== --- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2012-03-29 15:09:25 UTC (rev 10095) +++ trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2012-03-29 15:21:47 UTC (rev 10096) @@ -162,7 +162,12 @@ if (~isempty (datefmt)) %# replace consecutive spaces by one datefmt = regexprep (datefmt, '[ ]+', ' '); - datefields = 1 + length (regexp (datefmt, ' ')); + %# is "space" used as separator ? Then we may take more than one field. + if (~isempty (regexp (sep, ' '))) + datefields = 1 + length (regexp (datefmt, ' ')); + else + datefields = 1; + endif else datefields = 1; endif @@ -323,6 +328,9 @@ for indc = (2:datefields) datetime = cstrcat(datetime, ' ', dummy{indk+indc-1}); endfor + else + %# ensure spaces are unique + datetime = regexprep (datetime, '[ ]+', ' '); endif try This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |