From: <prn...@us...> - 2012-04-18 20:04:36
|
Revision: 10277 http://octave.svn.sourceforge.net/octave/?rev=10277&view=rev Author: prnienhuis Date: 2012-04-18 20:04:29 +0000 (Wed, 18 Apr 2012) Log Message: ----------- Added getUsedRange() method for JOD 1.3x and above Modified Paths: -------------- trunk/octave-forge/main/io/inst/getusedrange.m Modified: trunk/octave-forge/main/io/inst/getusedrange.m =================================================================== --- trunk/octave-forge/main/io/inst/getusedrange.m 2012-04-18 16:20:26 UTC (rev 10276) +++ trunk/octave-forge/main/io/inst/getusedrange.m 2012-04-18 20:04:29 UTC (rev 10277) @@ -67,7 +67,7 @@ ## 2011-09-08 Style & layout updates ## 2012-01-26 Fixed "seealso" help string ## -## Last subfunc update: 2012-04-09 (JOD) +## Last subfunc update: 2012-04-18 (JOD) function [ trow, lrow, lcol, rcol ] = getusedrange (spptr, ii) @@ -166,11 +166,11 @@ reprows = 0; endif - # Get leftmost cell column number + # Get leftmost cell column number lcell = row.getFirstChild (); cl_char = char (lcell); - # Swap the following lines into comment to catch a jOpenDocument bug which foobars OTK - # (JOD doesn't set <office:value-type='string'> attribute when writing strings + # Swap the following lines into comment to catch a jOpenDocument bug which foobars OTK + # (JOD doesn't set <office:value-type='string'> attribute when writing strings #if (isempty (findstr ('office:value-type', cl_char)) || isempty (findstr ('<text:', cl_char))) if (isempty (findstr ('office:value-type', cl_char))) lcol = min (lcol, lcell.getTableNumberColumnsRepeatedAttribute () + 1); @@ -232,124 +232,151 @@ ## Created: 2010-05-25 ## Last updates: ## 2010-05-31 Fixed ignoring table-covered-cells; fixed count of sheets comprising just A1:A1 -## Added option for wsh being a string argument +## '' Added option for wsh being a string argument ## 2010-08-12 Little textual adaptations ## 2010-11-13 Catched jOpenDocument bug (1.2bx) where string cells have no office:value-type ## '' attrb set (by JOD). Somehow OTK is more robust as it catches these cells ## 2012-04-09 Fixed rowrepcnt (str2num not applied to tablerow) +## 2012-04-18 Added getUsedRange() method for JOD 1.3x and above function [ trow, brow, lcol, rcol ] = getusedrange_jod (ods, wsh) - # This function works by virtue of sheets in JOD actually being a Java string. - # It works outside of the Java memory/heap space which is an added benefit... - # (Read: this is one big dirty hack...... prone to crash Java on BIG spreadsheets) + # This function works by virtue of sheets in JOD actually being a Java string. + # It works outside of the Java memory/heap space which is an added benefit... + # (Read: this is one big dirty hack...... prone to crash Java on BIG spreadsheets) - if (isnumeric (wsh)) - sh = char (ods.workbook.getSheet (wsh - 1)); - else - sh = char (ods.workbook.getSheet (wsh)); - endif + if (isnumeric (wsh)) + sh = ods.workbook.getSheet (wsh - 1); + else + sh = ods.workbook.getSheet (wsh); + endif - # Get table-row pointers - id_trow = strfind (sh, '<table:table-row'); - id = strfind (sh, '</table:table>') - 1; - id_trow = [id_trow id]; + try + # Let's see if we have JOD v. 1.3x. If not, next call fails & we'll fall back to the old hack + sh_rng = char (sh.getUsedRange ()); + if (isempty (sh_rng)) + # Empty sheet + trow = brow = lcol = rcol = 0; + else + # Strip sheet name + sh_rng = sh_rng(length (sh.getName) + 2 : end); + # Get rid of period + sh_rng = strrep (sh_rng, '.', ''); + [~, nr, nc, trow, lcol] = parse_sp_range (sh_rng); + brow = trow + nr - 1; + rcol = lcol + nc - 1; + endif + return + + catch + # This function works by virtue of sheets in JOD actually being a Java string. + # It works outside of the Java memory/heap space which is an added benefit... + # (Read: this is one big dirty hack...... prone to crash Java on BIG spreadsheets) - trow = rcol = 0; - lcol = 1024; brow = 0; - if (~isempty (id)) - # Loop over all table-rows - rowrepcnt = 0; - for irow = 1:length (id_trow)-1 - # Isolate single table-row - tablerow = sh(id_trow(irow):id_trow(irow+1)-1); - # Search table-cells. table-c covers both table-cell & table-covered-cell - id_tcell = strfind (tablerow, '<table:table-c'); - id_tcell = [id_tcell id]; - rowl = length (tablerow); - if (isempty (id_tcell(1:end-1))) - rowend = rowl; - else - rowend = id_tcell(1); - endif - # Add in table-number-rows-repeated attribute values - rowrept = strfind (tablerow(1:rowend), 'number-rows-repeated'); - if (~isempty (rowrept)) - [st, en] = regexp (tablerow(rowrept:min (rowend, rowrept+30)), '\d+'); - rowrepcnt += str2num (tablerow(rowrept+st-1:min (rowend, rowrept+en-1))) - 1; - endif + sh = char (sh); - # Search table-cells. table-c is a table-covered-cell that is considered empty - id_tcell = strfind (tablerow, '<table:table-c'); - if (~isempty (id_tcell)) - # OK, this row has a value cell. Now table-covered-cells must be included. - id_tcell2 = strfind (tablerow, '<table:covered-t'); - if (~isempty (id_tcell2)) id_tcell = sort ([id_tcell id_tcell2]); endif - id_tcell = [id_tcell rowl]; - # Search for non-empty cells (i.e., with an office:value-type attribute). But: - # jOpenDocument 1.2b3 has a bug: it often doesn't set this attr for string cells - id_valtcell = strfind (tablerow, 'office:value-type='); - id_textonlycell = strfind (tablerow, '<text:'); - id_valtcell = sort ([id_valtcell id_textonlycell]); - id_valtcell = [id_valtcell rowl]; - if (~isempty (id_valtcell(1:end-1))) - brow = irow + rowrepcnt; - # First set trow if it hadn't already been found - if (~trow) trow = irow; endif - # Search for repeated table-cells - id_reptcell = strfind (tablerow, 'number-columns-repeated'); - id_reptcell = [id_reptcell rowl]; - # Search for leftmost non-empty table-cell. llcol = counter for this table-row - llcol = 1; - while (id_tcell (llcol) < id_valtcell(1) && llcol <= length (id_tcell) - 1) - ++llcol; - endwhile - --llcol; - # Compensate for repeated cells. First count all repeats left of llcol - ii = 1; - repcnt = 0; - if (~isempty (id_reptcell(1:end-1))) - # First try lcol - while (ii <= length (id_reptcell) - 1 && id_reptcell(ii) < id_valtcell(1)) - # Add all repeat counts left of leftmost data tcell minus 1 for each - [st, en] = regexp (tablerow(id_reptcell(ii):id_reptcell(ii)+30), '\d+'); - repcnt += str2num (tablerow(id_reptcell(ii)+st-1:id_reptcell(ii)+en-1)) - 1; - ++ii; - endwhile - # Next, add current repcnt value to llcol and update lcol - lcol = min (lcol, llcol + repcnt); - # Get last value table-cell in table-cell idx - jj = 1; - while (id_tcell (jj) < id_valtcell(length (id_valtcell)-1)) - ++jj; - endwhile + # Get table-row pointers + id_trow = strfind (sh, '<table:table-row'); + id = strfind (sh, '</table:table>') - 1; + id_trow = [id_trow id]; - # Get rest of column repeat counts in value table-cell range - while (ii < length (id_reptcell) && id_reptcell(ii) < id_tcell(jj)) - # Add all repeat counts minus 1 for each tcell in value tcell range - [st, en] = regexp (tablerow(id_reptcell(ii):id_reptcell(ii)+30), '\d+'); - repcnt += str2num (tablerow(id_reptcell(ii)+st-1:id_reptcell(ii)+en-1)) - 1; - ++ii; - endwhile - else - # In case left column = A - lcol = min (lcol, llcol); - endif - # Count all table-cells in value table-cell-range - ii = 1; # Indexes cannot be negative - while (ii < length (id_tcell) && id_tcell(ii) < id_valtcell(length (id_valtcell) - 1)) - ++ii; - endwhile - --ii; - rcol = max (rcol, ii + repcnt); - endif - endif - endfor - else - # No data found, empty sheet - lcol = rcol = brow = trow = 0; - endif + trow = rcol = 0; + lcol = 1024; brow = 0; + if (~isempty (id)) + # Loop over all table-rows + rowrepcnt = 0; + for irow = 1:length (id_trow)-1 + # Isolate single table-row + tablerow = sh(id_trow(irow):id_trow(irow+1)-1); + # Search table-cells. table-c covers both table-cell & table-covered-cell + id_tcell = strfind (tablerow, '<table:table-c'); + id_tcell = [id_tcell id]; + rowl = length (tablerow); + if (isempty (id_tcell(1:end-1))) + rowend = rowl; + else + rowend = id_tcell(1); + endif + # Add in table-number-rows-repeated attribute values + rowrept = strfind (tablerow(1:rowend), 'number-rows-repeated'); + if (~isempty (rowrept)) + [st, en] = regexp (tablerow(rowrept:min (rowend, rowrept+30)), '\d+'); + rowrepcnt += str2num (tablerow(rowrept+st-1:min (rowend, rowrept+en-1))) - 1; + endif + # Search table-cells. table-c is a table-covered-cell that is considered empty + id_tcell = strfind (tablerow, '<table:table-c'); + if (~isempty (id_tcell)) + # OK, this row has a value cell. Now table-covered-cells must be included. + id_tcell2 = strfind (tablerow, '<table:covered-t'); + if (~isempty (id_tcell2)) id_tcell = sort ([id_tcell id_tcell2]); endif + id_tcell = [id_tcell rowl]; + # Search for non-empty cells (i.e., with an office:value-type attribute). But: + # jOpenDocument 1.2b3 has a bug: it often doesn't set this attr for string cells + id_valtcell = strfind (tablerow, 'office:value-type='); + id_textonlycell = strfind (tablerow, '<text:'); + id_valtcell = sort ([id_valtcell id_textonlycell]); + id_valtcell = [id_valtcell rowl]; + if (~isempty (id_valtcell(1:end-1))) + brow = irow + rowrepcnt; + # First set trow if it hadn't already been found + if (~trow) trow = irow; endif + # Search for repeated table-cells + id_reptcell = strfind (tablerow, 'number-columns-repeated'); + id_reptcell = [id_reptcell rowl]; + # Search for leftmost non-empty table-cell. llcol = counter for this table-row + llcol = 1; + while (id_tcell (llcol) < id_valtcell(1) && llcol <= length (id_tcell) - 1) + ++llcol; + endwhile + --llcol; + # Compensate for repeated cells. First count all repeats left of llcol + ii = 1; + repcnt = 0; + if (~isempty (id_reptcell(1:end-1))) + # First try lcol + while (ii <= length (id_reptcell) - 1 && id_reptcell(ii) < id_valtcell(1)) + # Add all repeat counts left of leftmost data tcell minus 1 for each + [st, en] = regexp (tablerow(id_reptcell(ii):id_reptcell(ii)+30), '\d+'); + repcnt += str2num (tablerow(id_reptcell(ii)+st-1:id_reptcell(ii)+en-1)) - 1; + ++ii; + endwhile + # Next, add current repcnt value to llcol and update lcol + lcol = min (lcol, llcol + repcnt); + # Get last value table-cell in table-cell idx + jj = 1; + while (id_tcell (jj) < id_valtcell(length (id_valtcell)-1)) + ++jj; + endwhile + + # Get rest of column repeat counts in value table-cell range + while (ii < length (id_reptcell) && id_reptcell(ii) < id_tcell(jj)) + # Add all repeat counts minus 1 for each tcell in value tcell range + [st, en] = regexp (tablerow(id_reptcell(ii):id_reptcell(ii)+30), '\d+'); + repcnt += str2num (tablerow(id_reptcell(ii)+st-1:id_reptcell(ii)+en-1)) - 1; + ++ii; + endwhile + else + # In case left column = A + lcol = min (lcol, llcol); + endif + # Count all table-cells in value table-cell-range + ii = 1; # Indexes cannot be negative + while (ii < length (id_tcell) && id_tcell(ii) < id_valtcell(length (id_valtcell) - 1)) + ++ii; + endwhile + --ii; + rcol = max (rcol, ii + repcnt); + endif + endif + endfor + else + # No data found, empty sheet + lcol = rcol = brow = trow = 0; + endif + + end_try_catch + endfunction @@ -413,7 +440,7 @@ erow = ecol = 0; for ii=1:numel (adrblks) # Check if address contains a sheet name in quotes (happens if name contains a period) - if (int8 (adrblks{ii}(1)) == 39) + if (int8 (adrblks{ii}(1)) == 39) # Strip sheet name part idx = findstr (adrblks{ii}, "'."); range = adrblks{ii}(idx+2 : end); @@ -455,30 +482,30 @@ function [ trow, brow, lcol, rcol ] = getusedrange_com (xls, ii) - sh = xls.workbook.Worksheets (ii); - - # Decipher used range. Beware, UsedRange() returns *cached* rectangle of - # all spreadsheet cells containing *anything*, including just formatting - # (i.e., empty cells are included too). ==> This is an approximation only - allcells = sh.UsedRange; - - # Get top left cell as a Range object - toplftcl = allcells.Columns(1).Rows(1); - - # Count number of rows & cols in virtual range from A1 to top left cell - lcol = sh.Range ("A1", toplftcl).columns.Count; - trow = sh.Range ("A1", toplftcl).rows.Count; - - # Add real occupied rows & cols to obtain end row & col - brow = trow + allcells.rows.Count() - 1; - rcol = lcol + allcells.columns.Count() - 1; - - # Check if there are real data - if ((lcol == rcol) && (trow = brow)) - if (isempty (toplftcl.Value)) - trow = brow = lcol = rcol = 0; - endif - endif + sh = xls.workbook.Worksheets (ii); + + # Decipher used range. Beware, UsedRange() returns *cached* rectangle of + # all spreadsheet cells containing *anything*, including just formatting + # (i.e., empty cells are included too). ==> This is an approximation only + allcells = sh.UsedRange; + + # Get top left cell as a Range object + toplftcl = allcells.Columns(1).Rows(1); + + # Count number of rows & cols in virtual range from A1 to top left cell + lcol = sh.Range ("A1", toplftcl).columns.Count; + trow = sh.Range ("A1", toplftcl).rows.Count; + + # Add real occupied rows & cols to obtain end row & col + brow = trow + allcells.rows.Count() - 1; + rcol = lcol + allcells.columns.Count() - 1; + + # Check if there are real data + if ((lcol == rcol) && (trow = brow)) + if (isempty (toplftcl.Value)) + trow = brow = lcol = rcol = 0; + endif + endif endfunction @@ -505,35 +532,35 @@ function [ trow, brow, lcol, rcol ] = getusedrange_poi (xls, ii) - persistent cblnk; cblnk = java_get ('org.apache.poi.ss.usermodel.Cell', 'CELL_TYPE_BLANK'); + persistent cblnk; cblnk = java_get ('org.apache.poi.ss.usermodel.Cell', 'CELL_TYPE_BLANK'); - sh = xls.workbook.getSheetAt (ii-1); # Java POI starts counting at 0 + sh = xls.workbook.getSheetAt (ii-1); # Java POI starts counting at 0 - trow = sh.getFirstRowNum (); # 0-based - brow = sh.getLastRowNum (); # 0-based - # Get column range - lcol = 1048577; # OOXML (xlsx) max. + 1 - rcol = 0; - botrow = brow; - for jj=trow:brow - irow = sh.getRow (jj); - if (~isempty (irow)) - scol = (irow.getFirstCellNum).intValue (); - lcol = min (lcol, scol); - ecol = (irow.getLastCellNum).intValue () - 1; - rcol = max (rcol, ecol); - # Keep track of lowermost non-empty row as getLastRowNum() is unreliable - if ~(irow.getCell(scol).getCellType () == cblnk && irow.getCell(ecol).getCellType () == cblnk) - botrow = jj; - endif - endif - endfor - if (lcol > 1048576) - # Empty sheet - trow = 0; brow = 0; lcol = 0; rcol = 0; - else - brow = min (brow, botrow) + 1; ++trow; ++lcol; ++rcol; # 1-based return values - endif + trow = sh.getFirstRowNum (); # 0-based + brow = sh.getLastRowNum (); # 0-based + # Get column range + lcol = 1048577; # OOXML (xlsx) max. + 1 + rcol = 0; + botrow = brow; + for jj=trow:brow + irow = sh.getRow (jj); + if (~isempty (irow)) + scol = (irow.getFirstCellNum).intValue (); + lcol = min (lcol, scol); + ecol = (irow.getLastCellNum).intValue () - 1; + rcol = max (rcol, ecol); + # Keep track of lowermost non-empty row as getLastRowNum() is unreliable + if ~(irow.getCell(scol).getCellType () == cblnk && irow.getCell(ecol).getCellType () == cblnk) + botrow = jj; + endif + endif + endfor + if (lcol > 1048576) + # Empty sheet + trow = 0; brow = 0; lcol = 0; rcol = 0; + else + brow = min (brow, botrow) + 1; ++trow; ++lcol; ++rcol; # 1-based return values + endif endfunction @@ -561,33 +588,33 @@ function [ trow, brow, lcol, rcol ] = getusedrange_jxl (xls, wsh) - persistent emptycell = (java_get ('jxl.CellType', 'EMPTY')).toString (); + persistent emptycell = (java_get ('jxl.CellType', 'EMPTY')).toString (); - sh = xls.workbook.getSheet (wsh - 1); # JXL sheet count 0-based + sh = xls.workbook.getSheet (wsh - 1); # JXL sheet count 0-based - brow = sh.getRows (); - rcol = sh.getColumns (); - - if (brow == 0 || rcol == 0) - # Empty sheet - trow = 0; lcol = 0; brow = 0; rcol = 0; - else - trow = brow + 1; - lcol = rcol + 1; - for ii=0:brow-1 # For loop coz we must check ALL rows for leftmost column - emptyrow = 1; - jj = 0; - while (jj < rcol && emptyrow) # While loop => only til first non-empty cell - cell = sh.getCell (jj, ii); - if ~(strcmp (char (cell.getType ()), emptycell)) - lcol = min (lcol, jj + 1); - emptyrow = 0; - endif - ++jj; - endwhile - if ~(emptyrow) trow = min (trow, ii + 1); endif - endfor - endif + brow = sh.getRows (); + rcol = sh.getColumns (); + + if (brow == 0 || rcol == 0) + # Empty sheet + trow = 0; lcol = 0; brow = 0; rcol = 0; + else + trow = brow + 1; + lcol = rcol + 1; + for ii=0:brow-1 # For loop coz we must check ALL rows for leftmost column + emptyrow = 1; + jj = 0; + while (jj < rcol && emptyrow) # While loop => only til first non-empty cell + cell = sh.getCell (jj, ii); + if ~(strcmp (char (cell.getType ()), emptycell)) + lcol = min (lcol, jj + 1); + emptyrow = 0; + endif + ++jj; + endwhile + if ~(emptyrow) trow = min (trow, ii + 1); endif + endfor + endif endfunction @@ -616,18 +643,18 @@ function [ trow, brow, lcol, rcol ] = getusedrange_oxs (xls, wsh) - sh = xls.workbook.getWorkSheet (wsh - 1); - try - # Intriguing: sh.getFirst<> is off by one, sh.getLast<> = OK.... 8-Z - trow = sh.getFirstRow () + 1; - brow = sh.getLastRow (); - lcol = sh.getFirstCol () + 1; - rcol = sh.getLastCol (); - catch - # Might be an empty sheet - trow = brow = lcol = rcol = 0; - end_try_catch - # Check for empty sheet - if ((trow > brow) || (lcol > rcol)), trow = brow = lcol = rcol = 0; endif + sh = xls.workbook.getWorkSheet (wsh - 1); + try + # Intriguing: sh.getFirst<> is off by one, sh.getLast<> = OK.... 8-Z + trow = sh.getFirstRow () + 1; + brow = sh.getLastRow (); + lcol = sh.getFirstCol () + 1; + rcol = sh.getLastCol (); + catch + # Might be an empty sheet + trow = brow = lcol = rcol = 0; + end_try_catch + # Check for empty sheet + if ((trow > brow) || (lcol > rcol)), trow = brow = lcol = rcol = 0; endif endfunction This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |