From: <cde...@us...> - 2010-09-30 10:11:37
|
Revision: 7782 http://octave.svn.sourceforge.net/octave/?rev=7782&view=rev Author: cdemills Date: 2010-09-30 10:11:30 +0000 (Thu, 30 Sep 2010) Log Message: ----------- - added a 'sep' argument for the elements separator - when splitting into lines, do a second pass to remove "\n" - use strsplit instead of a regexp, to permit empty fields - empty fields must not be stored in x - remove empty lines and column before further processing Modified Paths: -------------- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m Modified: trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m =================================================================== --- trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2010-09-29 20:11:16 UTC (rev 7781) +++ trunk/octave-forge/extra/dataframe/inst/@dataframe/dataframe.m 2010-09-30 10:11:30 UTC (rev 7782) @@ -29,6 +29,7 @@ %# Each preceeding line is silently skipped. Default: none %# @item unquot: a logical switch telling wheter or not strings should %# be unquoted before storage, default = true; + %# @item sep: the elements separator, default ',' %# @end itemize %# The remaining data are concatenanted (right-appended) to the existing ones. %# @end deftypefn @@ -82,7 +83,7 @@ df = dataframe([]); %# get the right fields endif -seeked = []; unquot = true; +seeked = []; unquot = true; sep = ','; if length(varargin) > 0, indi = 1; @@ -112,7 +113,7 @@ df._name{2} = cellstr(num2str(varargin{indi+1})); endswitch %# detect assignment - functions calls - ranges - dummy = cellfun('size', cellfun(@(x) strsplit(x, ':=('), df._name{2}, \ + dummy = cellfun('size', cellfun(@(x) strsplit(x, ":=("), df._name{2}, \ "UniformOutput", false), 2); if any(dummy > 1), warning('dataframe colnames taken literally and not interpreted'); @@ -125,6 +126,9 @@ case 'unquot', unquot = varargin{indi + 1}; varargin(indi:indi+1) = []; + case 'sep', + sep = varargin{indi + 1}; + varargin(indi:indi+1) = []; otherwise %# FIXME: just skip it for now indi = indi + 1; endswitch @@ -152,11 +156,18 @@ fclose(fid); end_unwind_protect lines = regexp(in,'(^|\n)([^\n]+)', 'match'); %# cut into lines + lines = cellfun(@(x) regexp(x, '[^\n]*', 'match'), lines); + %#, \'UniformOutput', false); + %# remove \n %# a field either starts at a word boundary, either by + - . for %# a numeric data, either by ' for a string. - content = cellfun(@(x) regexp(x, '(\b|[-+\.''])[^,]+(''|\b)', 'match'), \ - lines, 'UniformOutput', false); %# extract fields - indl = 1; indj = 1; %# disp('line 151 '); keyboard + + %# content = cellfun(@(x) regexp(x, '(\b|[-+\.''])[^,]*(''|\b)', 'match'),\ + %# lines, 'UniformOutput', false); %# extract fields + + content = cellfun(@(x) strsplit(x, sep), lines, \ + 'UniformOutput', false); %# extract fields + indl = 1; indj = 1; %#disp('line 151 '); keyboard if ~isempty(seeked), while indl <= length(lines), dummy = content{indl}; @@ -169,8 +180,14 @@ dummy = content{indl}; endif x = cell(1+length(lines)-indl, size(dummy, 2)); + empty_lines = []; while indl <= length(lines), dummy = content{indl}; + if all(cellfun('size', dummy, 2) == 0), + empty_lines = [empty_lines indj]; + indl = indl + 1; indj = indj + 1; + continue; + endif %# try to convert to float the_line = cellfun(@(x) sscanf(x, "%f"), dummy, ... 'UniformOutput', false); @@ -185,8 +202,8 @@ in = regexp(dummy{indk}, '[^'']+', 'match'); if !isempty(in), x(indj, indk) = in{1}; - else - x(indj, indk) = []; + %# else + %# x(indj, indk) = []; endif end_try_catch else @@ -198,10 +215,18 @@ endfor indl = indl + 1; indj = indj + 1; endwhile - clear UTF8_BOM fid in lines indl the_line content + if !isempty(empty_lines), + x(empty_lines, :) = []; + endif + %# detect empty columns + empty_lines = find(0 == sum(cellfun('size', x, 2))) + if !isempty(empty_lines), + x(:, empty_lines) = []; + endif + clear UTF8_BOM fid in lines indl the_line content empty_lines end_try_catch endif - + %# fallback, avoiding a recursive call idx.type = '()'; indj = df._cnt(2)+(1:size(x, 2)); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |