HTML Parser / Discussion / Help: Why is my memory usage climbing so quickly?

I'm running a 17MB HTML document through HTMLParser. I use Lexer.nextNode() in a loop to iterate over the entire document, writing it verbatim to an OutputStream except when I recognize a particular sequence of tags, which I then write out in a custom manner.

If I leave my maximum heap size at 64 MB, the program gives up with an OutOfMemoryError before finishing the transformation process. I can't account for this -- is there any good reason the Lexer should accumulate heap space as it progresses through the document?

Source code follows:

-----

import org.htmlparser.Node;
import org.htmlparser.Tag;
import org.htmlparser.lexer.Lexer;
import org.htmlparser.lexer.Page;
import org.htmlparser.util.ParserException;

import java.io.*;
import java.util.Date;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class ReportOutputFilter {

private static final Pattern HrefPattern = Pattern.compile("row_number");

   public static void filterReportStream(InputStream in, OutputStream out)
           throws IOException
      {
         int success = 0;
         long rowCount = 0;
         long depth = -1;
         Page page = new Page(in, "UTF-8");
         Lexer lexer = new Lexer(page);
         Node node;
         Tag tag;
         Tag trTag = null, tdTag = null, aTag = null, imgTag = null;
         String tagName;
         Date startDate, endDate;
         long elapsed;

BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out));

try {
startDate = new Date();

            while (null != (node = lexer.nextNode())) {
               if (node instanceof Tag) {
                  tag = (Tag) node;
                  tagName = tag.getTagName();

                  if (tag.isEndTag()) {
                     writeNode(writer, depth, node, false);
                     depth--;
                     continue;
                  }

depth++;

                  if ("TR".equals(tagName)) {
                     trTag = tag;
                     success = 1;
                  }
                  else if ("TD".equals(tagName) && success == 1) {
                     tdTag = tag;
                     success = 2;
                  }
                  else if ("A".equals(tagName) && success == 2) {
                     aTag = tag;
                     success = 3;
                  }
                  else if ("IMG".equals(tagName) && success == 3) {
                     imgTag = tag;
                     // Full success, row tags found
                     // Write the whole bunch of opener tags to the output

writeReportRowOpeners(writer, depth, rowCount, trTag, tdTag, aTag, imgTag);
rowCount++;

                     trTag = tdTag = aTag = imgTag = null;
                     success = 0;
                  }
                  else {
                     trTag = tdTag = aTag = imgTag = null;
                     success = 0;
                     writeNode(writer, depth, node, false);
                  }

                  if (tag.isEmptyXmlTag() || tagName.equals("META") || tagName.equals("IMG"))
                     depth--;
               }
               else {
                  writeNode(writer, depth, node, false);
               }
            }

            endDate = new Date();
            elapsed = endDate.getTime() - startDate.getTime();
            System.out.println("Elapsed time PREPARING nodes: " + elapsed + " milliseconds.");

         }
         catch (ParserException pe) {
            System.err.println("F***! A problem occurred while parsing.");
         }

writer.flush();
}

   private static void writeNode(Writer writer, long depth, Node node, boolean newline)
           throws IOException
      {
         writer.write(node.toHtml());
         if (newline) writer.write("\n");
      }

   private static void writeReportRowOpeners(Writer writer, long depth, long rowCount, Tag trTag, Tag tdTag, Tag aTag, Tag imgTag)
           throws IOException
      {
         String fixedHref;

Matcher m = HrefPattern.matcher(aTag.getAttribute("href"));
fixedHref = m.replaceFirst(String.valueOf(rowCount));

         writer.write("<tr>\n");
         writer.write("<td style=\"" + tdTag.getAttribute("style") + "\" class=\"" + tdTag.getAttribute("class") + "\">\n");
         writer.write("<a href=\"" + fixedHref + "\" style=\"" + aTag.getAttribute("style") + "\" class=\"" + aTag.getAttribute("class") + "\">\n");
         writer.write("<img name=\"" + imgTag.getAttribute("name") + "\" id=\"reportRowDrill" + rowCount + "\" />\n");
      }

   public static void main(String[] args)
      {
         try {
            FileInputStream fis = new FileInputStream("C:\\Documents and Settings\\Administrator\\Desktop\\Purchase Transactions.htm");

FileOutputStream fos = new FileOutputStream("C:\\Documents and Settings\\Administrator\\Desktop\\Output.htm");

ReportOutputFilter.filterReportStream(fis, fos);

            fis.close();
            fos.close();
         }
         catch (IOException ioe) {
            System.err.println("IOException.");
         }
      }

}

Why is my memory usage climbing so quickly?

Forums

Help

Why is my memory usage climbing so quickly? document.SUBSCRIPTION_OPTIONS = { "thing": "topic", "subscribed": false, "url": "subscribe", "icon": { "css": "fa fa-envelope-o" } };

Why is my memory usage climbing so quickly?