Learn how easy it is to sync an existing GitHub or Google Code repo to a SourceForge project! See Demo

Close

Diff of /inst/@dataframe/dataframe.m [4130dc] .. [f24aa1] Maximize Restore

  Switch to side-by-side view

--- a/inst/@dataframe/dataframe.m
+++ b/inst/@dataframe/dataframe.m
@@ -110,7 +110,7 @@
 seeked = ''; trigger = ''; unquot = true; sep = "\t,"; cmt_lines = [];
 conv_regexp = {}; datefmt = ''; verbose = false;
 
-if (length (varargin) > 0)	%# extract known arguments
+if (length (varargin) > 0)      %# extract known arguments
   indi = 1;
   %# loop over possible arguments
   while (indi <= size (varargin, 2))
@@ -166,14 +166,14 @@
         case 'datefmt'
           datefmt = varargin{indi + 1};
           varargin(indi:indi+1) = [];
-	case 'verbose'
+        case 'verbose'
           verbose = varargin{indi + 1};
           varargin(indi:indi+1) = [];
-	case '--'
-	  %# stop processing args -- take the rest as filenames
-	  varargin(indi) = [];
-	  break;
-	otherwise %# FIXME: just skip it for now
+        case '--'
+          %# stop processing args -- take the rest as filenames
+          varargin(indi) = [];
+          break;
+        otherwise %# FIXME: just skip it for now
           disp (sprintf ("Ignoring unkown argument %s", varargin{indi}));
           indi = indi + 1;    
       endswitch
@@ -207,94 +207,96 @@
     if (isa (x, 'char') && size (x, 1) < 2)
       dummy = tilde_expand (x);
       try
-	%# read the data frame from a file
+        %# read the data frame from a file
         x = load (dummy);
         df._src{end+1, 1} = dummy;
       catch
         %# try our own method
         UTF8_BOM = char ([0xEF 0xBB 0xBF]);
-	%# Is it compressed ?
-	cmd = []; count = regexpi (dummy, '\.gz');
-	if (length (dummy) - count == 2)
-	  cmd = ['gzip -dc '];
-	else
-	  count = regexpi (dummy, '\.bz2');
-	  if (length (dummy) - count == 3)
-	    cmd = ['bzip2 -dc '];
-	  else
-	    count = regexpi (dummy, '\.xz');
-	    if (length (dummy) - count == 2)
-	      cmd = ['xz -dc '];
-	    else
-	      count = regexpi (dummy, '\.zip');
-	      if (length (dummy) - count == 3)
-		cmd = ['unzip -p '];
-	      else
-		count = regexpi (dummy, '\.lzo');
-		if (length (dummy) - count == 3)
-		  cmd = ['lzop -dc '];
-		endif
-	      endif
-	    endif
-	  endif
-	endif
-
-	if (isempty (cmd)) %# direct read
-	  [fid, msg] = fopen (dummy, 'rt');
-	else
-	  %# The file we read from external process must be seekable !!!
-	  tmpfile = tmpnam (); df._src{end+1, 1} = dummy;
-	  %# quote to protect from spaces in the name
-	  dummy = strcat ('''', dummy, '''');
-	  cmd = [cmd, dummy,  ' > ',  tmpfile];
-	  if (exist ('OCTAVE_VERSION', 'builtin'))
-	    [output, status] = system (cmd);
-	  else
-	    [status, output] = system (cmd);
-	  endif 
-	  if (not (0 == status))
-	    disp (sprintf ("%s exited with status %d", cmd, status));
-	  endif
-	  fid = fopen (tmpfile, 'rt');
-	  if (exist ('OCTAVE_VERSION', 'builtin'))
-	    [cmd, status] = unlink (tmpfile);
-	  else
-	    delete (tmpfile)
-	  endif
-	endif
-
+        %# Is it compressed ?
+        cmd = []; count = regexpi (dummy, '\.gz');
+        if (length (dummy) - count == 2)
+          cmd = ['gzip -dc '];
+        else
+          count = regexpi (dummy, '\.bz2');
+          if (length (dummy) - count == 3)
+            cmd = ['bzip2 -dc '];
+          else
+            count = regexpi (dummy, '\.xz');
+            if (length (dummy) - count == 2)
+              cmd = ['xz -dc '];
+            else
+              count = regexpi (dummy, '\.zip');
+              if (length (dummy) - count == 3)
+                cmd = ['unzip -p '];
+              else
+                count = regexpi (dummy, '\.lzo');
+                if (length (dummy) - count == 3)
+                  cmd = ['lzop -dc '];
+                endif
+              endif
+            endif
+          endif
+        endif
+        
+        if (isempty (cmd)) %# direct read
+          [fid, msg] = fopen (dummy, 'rt');
+        else
+          %# The file we read from external process must be seekable !!!
+          tmpfile = tmpnam (); df._src{end+1, 1} = dummy;
+          %# quote to protect from spaces in the name
+          dummy = strcat ('''', dummy, '''');
+          cmd = [cmd, dummy,  ' > ',  tmpfile];
+          if (exist ('OCTAVE_VERSION', 'builtin'))
+            [output, status] = system (cmd);
+          else
+            [status, output] = system (cmd);
+          endif 
+          if (not (0 == status))
+            disp (sprintf ("%s exited with status %d", cmd, status));
+          endif
+          fid = fopen (tmpfile, 'rt');
+          if (exist ('OCTAVE_VERSION', 'builtin'))
+            [cmd, status] = unlink (tmpfile);
+          else
+            delete (tmpfile)
+          endif
+        endif
+        
         unwind_protect
           in = [];
           if (fid ~= -1)
-	    dummy = fgetl (fid);
-	    if (-1 == dummy)
-	      x = []; %# file is valid but empty
-	    else  
+            dummy = fgetl (fid);
+            if (-1 == dummy)
+              x = []; %# file is valid but empty
+            else  
               if (~strcmp (dummy, UTF8_BOM))
-		frewind (fid);
+                frewind (fid);
               endif
               %# slurp everything and convert doubles to char, avoiding
               %# problems with char > 127
               in = char (fread (fid).');
-	    endif 
+            endif 
           endif
         unwind_protect_cleanup
           if (fid ~= -1) fclose (fid); endif
         end_unwind_protect
-
+        
         if (~isempty (in))
           %# explicit list taken from 'man pcrepattern' -- we enclose all
           %# vertical separators in case the underlying regexp engine
           %# doesn't have them all.
           eol = '(\r\n|\n|\v|\f|\r|\x85)';
           %# cut into lines -- include the EOL to have a one-to-one
-          %# matching between line numbers. Use a non-greedy match.
+            %# matching between line numbers. Use a non-greedy match.
           lines = regexp (in, ['.*?' eol], 'match');
-	  try
-            dummy = cellfun (@(x) regexp (x, eol), lines); 
-	  catch
-	    disp('line 245 -- binary garbage in the input file ? '); keyboard
-	  end
+          %# spare memory
+          clear in;
+          try
+            dummy =  cellfun (@(x) regexp (x, eol), lines); 
+          catch  
+            disp('line 245 -- binary garbage in the input file ? '); keyboard
+          end
           %# remove the EOL character(s)
           lines(1 == dummy) = {""};
           %# use a positive lookahead -- eol is not part of the match
@@ -313,104 +315,107 @@
             content = cellfun (@(x) strsplit (x, sep), lines, ...
                                'UniformOutput', false); %# extract fields 
           endif
+          %# spare memory
+          clear lines;
+
+          indl = 1; indj = 1; dummy = []; 
           
-	  indl = 1; indj = 1; %# disp('line 151 '); keyboard
-          
-	  if (~isempty (seeked))
-            while (indl <= length (lines))
+          if (~isempty (seeked))
+            while (indl <= length (content))
               dummy = content{indl};
               if (all (cellfun ('size', dummy, 2) == 0))
                 indl = indl + 1; 
                 continue;
               endif
               if (all (cellfun (@isempty, regexp (dummy, seeked, 'match')))) 
-		if (isempty (df._header))
-		  df._header =  dummy;
-		else
-		  df._header(end+1, 1:length (dummy)) = dummy;
-		endif
-		indl = indl + 1;
-		continue;
-	      endif
-	      break;
+                if (isempty (df._header))
+                  df._header =  dummy;
+                else
+                  df._header(end+1, 1:length (dummy)) = dummy;
+                endif
+                indl = indl + 1;
+                continue;
+              endif
+              break;
             endwhile
-          
-	  elseif (~isempty (trigger))
-            while (indl <= length (lines))
+          elseif (~isempty (trigger))
+            while (indl <= length (content))
               dummy = content{indl};
               indl = indl + 1;
               if (all (cellfun ('size', dummy, 2) == 0))
                 continue;
               endif
-	      if (isempty (df._header))
-		 df._header =  dummy;
-	      else
-		df._header(end+1, 1:length (dummy)) = dummy;
-	      endif
-	      if (all (cellfun (@isempty, regexp (dummy, trigger, 'match'))))
-		continue;	
-	      endif
-	      break;
+              if (isempty (df._header))
+                 df._header =  dummy;
+              else
+                df._header(end+1, 1:length (dummy)) = dummy;
+              endif
+              if (all (cellfun (@isempty, regexp (dummy, trigger, 'match'))))
+                continue;       
+              endif
+              break;
             endwhile
-          endif
-
-	  if (indl > length (lines))
-	     x = []; 
-	  else
-	    x = cell (1+length (lines)-indl, size (dummy, 2)); 
+          else
+            dummy = content{1}; %# rough guess
+          endif
+
+          if (indl > length (content))
+             x = []; 
+          else
+            x = cell (1+length (content)-indl, size (dummy, 2)); 
             empty_lines = []; cmt_lines = [];
-            while (indl <= length (lines))
+            while (indl <= length (content))
               dummy = content{indl};
               if (all (cellfun ('size', dummy, 2) == 0))
-		empty_lines = [empty_lines indj];
-		indl = indl + 1; indj = indj + 1;
-		continue;
+                empty_lines = [empty_lines indj];
+                indl = indl + 1; indj = indj + 1;
+                continue;
               endif
               %# does it looks like a comment line ?
               if (regexp (dummy{1}, ['^\s*' char(35)]))
-		empty_lines = [empty_lines indj];
-		cmt_lines = strvcat (cmt_lines, horzcat (dummy{:}));
-		indl = indl + 1; indj = indj + 1;
-		continue;
-              endif
-	      
-	      if (all (cellfun (@isempty, regexp (dummy, trigger, 'match'))))
-		%# it does not look like the trigger. Good.
-		%# try to convert to float
-		if (~ isempty(conv_regexp))
-		  dummy = regexprep (dummy, conv_regexp{});
-		endif
-		the_line = cellfun (@(x) sscanf (x, "%f"), dummy, ...
+                empty_lines = [empty_lines indj];
+                cmt_lines = strvcat (cmt_lines, horzcat (dummy{:}));
+                indl = indl + 1; indj = indj + 1;
+                continue;
+              endif
+              
+              if (all (cellfun (@isempty, regexp (dummy, trigger, 'match'))))
+                %# it does not look like the trigger. Good.
+                %# try to convert to float
+                if (~ isempty(conv_regexp))
+                  dummy = regexprep (dummy, conv_regexp{});
+                endif
+                the_line = cellfun (@(x) sscanf (x, "%f"), dummy, ...
                                     'UniformOutput', false);
-              	
-		indk = 1; indm = 1;
-		while (indk <= size (the_line, 2))
-		  if (isempty (the_line{indk}) || any (size (the_line{indk}) > 1)) 
+                
+                indk = 1; indm = 1;
+                while (indk <= size (the_line, 2))
+                  if (isempty (the_line{indk}) || any (size (the_line{indk}) > 1)) 
                     %#if indi > 1 && indk > 1, disp('line 117 '); keyboard; %#endif
-		    if (isempty (dummy {indk}))
-		      %# empty field, just don't care
-		      indk = indk + 1; indm = indm + 1;
-		      continue;
-		    endif
+                    if (isempty (dummy {indk}))
+                      %# empty field, just don't care
+                      indk = indk + 1; indm = indm + 1;
+                      continue;
+                    endif
                     if (unquot)
                       try
-			%# remove quotes and leading space(s)
-			x(indj, indm) = regexp (dummy{indk}, '[^''" ].*[^''"]', 'match'){1};
+                        %# remove quotes and leading space(s)
+                        x(indj, indm) = regexp (dummy{indk}, '[^''" ].*[^''"]', 'match'){1};
                       catch
-			%# if the previous test fails, try a simpler one
-			in = regexp (dummy{indk}, '[^'' ]+', 'match');
-			if (~isempty (in))
-			  x(indj, indm) = in{1};
-			%# else
-			%#    x(indj, indk) = [];
-			endif
+                        %# if the previous test fails, try a simpler one
+                        in = regexp (dummy{indk}, '[^'' ]+', 'match');
+                        if (~isempty (in))
+                          x(indj, indm) = in{1};
+                        %# else
+                        %#    x(indj, indk) = [];
+                        endif
                       end_try_catch
                     else
                       %# no conversion possible, store and remove leading space(s)
                       x(indj, indm) = regexp (dummy{indk}, '[^ ].*', 'match');
                     endif
-		  elseif (~isempty (regexp (dummy{indk}, '[/:-]')) && ...
-			  ~isempty (datefmt))
+                  elseif (~isempty (regexp (dummy{indk}, '[/:-]')) && ...
+                          ~isempty (datefmt))
                     %# does it look like a date ?
                     datetime = dummy{indk}; 
                     
@@ -418,7 +423,7 @@
                       %# concatenate the required number of fields 
                       indc = 1;
                       for indc = (2:datefields)
-			datetime = cstrcat(datetime, ' ', dummy{indk+indc-1});
+                        datetime = cstrcat(datetime, ' ', dummy{indk+indc-1});
                       endfor
                     else
                       %# ensure spaces are unique
@@ -428,32 +433,32 @@
                     try
                       datetime = datevec (datetime, datefmt);
                       timeval = struct ("usec", 0, "sec", floor (datetime (6)),
-					"min", datetime(5), "hour", datetime(4),
-					"mday", datetime(3), "mon", datetime(2)-1,
-					"year", datetime(1)-1900);
+                                        "min", datetime(5), "hour", datetime(4),
+                                        "mday", datetime(3), "mon", datetime(2)-1,
+                                        "year", datetime(1)-1900);
                       timeval.usec = 1e6*(datetime(6) - timeval.sec);
                       x(indj, indm) =  str2num (strftime ([char(37) 's'], timeval)) + ...
-				       timeval.usec * 1e-6;
+                                       timeval.usec * 1e-6;
                       if (datefields > 1)
-			%# skip fields successfully converted
-			indk = indk + (datefields - 1);
+                        %# skip fields successfully converted
+                        indk = indk + (datefields - 1);
                       endif
                     catch
                       %# store it as is
                       x(indj, indm) = the_line{indk}; 
                     end_try_catch
-		  else
+                  else
                     x(indj, indm) = the_line{indk}; 
-		  endif
-		  indk = indk + 1; indm = indm + 1;
-		endwhile
-		indl = indl + 1; indj = indj + 1;
-	      else
-		%# trigger seen again. Throw last value and abort processing.
-		x(end, :) = [];
-		fprintf (2, "Trigger seen a second time, stopping processing\n");
-		break
-	      end
+                  endif
+                  indk = indk + 1; indm = indm + 1;
+                endwhile
+                indl = indl + 1; indj = indj + 1;
+              else
+                %# trigger seen again. Throw last value and abort processing.
+                x(end, :) = [];
+                fprintf (2, "Trigger seen a second time, stopping processing\n");
+                break
+              end
             endwhile
             
             if (~isempty (empty_lines))
@@ -466,8 +471,8 @@
               x(:, empty_lines) = [];
             endif
           endif
-	  
-          clear UTF8_BOM fid in lines indl the_line content empty_lines 
+          
+          clear UTF8_BOM fid indl the_line content empty_lines 
           clear datetime timeval idx count tmpfile cmd output status
 
         endif
@@ -531,13 +536,13 @@
       if (ndims (x) > 2), idx.subs{3} = 1:size (x, 3); endif
       %#      df = subsasgn(df, idx, x);        <= call directly lower level
       try
-	if (verbose)
-	   printf ("Calling df_matassign, orig size: %s\n", disp (size (df)));
-	   printf ("size(x): %s\n", disp (size (x)));
-	endif
-	df = df_matassign (df, idx, indj, length (indj), x, trigger);
+        if (verbose)
+           printf ("Calling df_matassign, orig size: %s\n", disp (size (df)));
+           printf ("size(x): %s\n", disp (size (x)));
+        endif
+        df = df_matassign (df, idx, indj, length (indj), x, trigger);
       catch
-	disp ('line 443 '); keyboard
+        disp ('line 443 '); keyboard
       end_try_catch
       if (~isempty (cmt_lines))
         df._cmt = vertcat (df._cmt, cellstr (cmt_lines));