Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12246/htmlparser/src/org/htmlparser/lexer
Modified Files:
Lexer.java Page.java Stream.java
Log Message:
Optimizations suggested by profiling.
Correction to previous drop:
-- use extractAllNodesThatmatch to replace searchFor(cls) --
Index: Page.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Page.java,v
retrieving revision 1.50
retrieving revision 1.51
diff -C2 -d -r1.50 -r1.51
*** Page.java 15 May 2005 11:49:04 -0000 1.50
--- Page.java 20 Jun 2005 01:56:32 -0000 1.51
***************
*** 687,699 ****
{
int i;
char ret;
i = cursor.getPosition ();
! if (mSource.offset () < i)
! // hmmm, we could skip ahead, but then what about the EOL index
! throw new ParserException (
! "attempt to read future characters from source "
! + i + " > " + mSource.offset ());
! else if (mSource.offset () == i)
try
{
--- 687,696 ----
{
int i;
+ int offset;
char ret;
i = cursor.getPosition ();
! offset = mSource.offset ();
! if (offset == i)
try
{
***************
*** 713,717 ****
+ cursor.getPosition (), ioe);
}
! else
{
// historic read
--- 710,714 ----
+ cursor.getPosition (), ioe);
}
! else if (offset > i)
{
// historic read
***************
*** 728,731 ****
--- 725,733 ----
cursor.advance ();
}
+ else
+ // hmmm, we could skip ahead, but then what about the EOL index
+ throw new ParserException (
+ "attempt to read future characters from source "
+ + i + " > " + mSource.offset ());
// handle \r
Index: Stream.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Stream.java,v
retrieving revision 1.14
retrieving revision 1.15
diff -C2 -d -r1.14 -r1.15
*** Stream.java 15 May 2005 11:49:04 -0000 1.14
--- Stream.java 20 Jun 2005 01:56:32 -0000 1.15
***************
*** 264,270 ****
// the array pointed to can only be bigger than the previous buffer,
// and hence no array bounds exception can be raised.
! if (0 == available ())
fill (false);
! if (0 != available ())
ret = mBuffer[mOffset++] & 0xff;
else
--- 264,270 ----
// the array pointed to can only be bigger than the previous buffer,
// and hence no array bounds exception can be raised.
! if (0 == (mLevel - mOffset)) // (0 == available ())
fill (false);
! if (0 != (mLevel - mOffset)) // (0 != available ())
ret = mBuffer[mOffset++] & 0xff;
else
Index: Lexer.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Lexer.java,v
retrieving revision 1.39
retrieving revision 1.40
diff -C2 -d -r1.39 -r1.40
*** Lexer.java 15 May 2005 11:49:04 -0000 1.39
--- Lexer.java 20 Jun 2005 01:56:32 -0000 1.40
***************
*** 1371,1375 ****
ConnectionManager manager = Page.getConnectionManager ();
lexer = new Lexer (manager.openConnection (args[0]));
! while (null != (node = lexer.nextNode ()))
System.out.println (node.toString ());
}
--- 1371,1375 ----
ConnectionManager manager = Page.getConnectionManager ();
lexer = new Lexer (manager.openConnection (args[0]));
! while (null != (node = lexer.nextNode (false)))
System.out.println (node.toString ());
}
|