[Htmlparser-cvs] htmlparser/src/org/htmlparser/parserHelper TagParser.java,1.28,1.29
Brought to you by:
derrickoswald
|
From: <po...@us...> - 2003-05-22 00:36:18
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper
In directory sc8-pr-cvs1:/tmp/cvs-serv7363/src/org/htmlparser/parserHelper
Modified Files:
TagParser.java
Log Message:
- Tag now remembers (in tagLines) *all* the lines spanned by the tag (not just the last one).
- Tag now remembers line number on which tag starts (from TagData).
- Tag now has new public methods:
int getTagStartLine()
int getTagEndLine()
String[] getTagLines()
- TagParser now gives Tag (via constructor and setTagLine) sufficient info to support the above.
Index: TagParser.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/TagParser.java,v
retrieving revision 1.28
retrieving revision 1.29
diff -C2 -d -r1.28 -r1.29
*** TagParser.java 19 May 2003 02:49:57 -0000 1.28
--- TagParser.java 22 May 2003 00:36:15 -0000 1.29
***************
*** 39,44 ****
public class TagParser {
public final static int TAG_BEFORE_PARSING_STATE=1;
! public final static int TAG_BEGIN_PARSING_STATE=1<<2;
! public final static int TAG_FINISHED_PARSING_STATE=1<<3;
public final static int TAG_ILLEGAL_STATE=1<<4;
public final static int TAG_IGNORE_DATA_STATE=1<<5;
--- 39,44 ----
public class TagParser {
public final static int TAG_BEFORE_PARSING_STATE=1;
! public final static int TAG_BEGIN_PARSING_STATE=1<<2;
! public final static int TAG_FINISHED_PARSING_STATE=1<<3;
public final static int TAG_ILLEGAL_STATE=1<<4;
public final static int TAG_IGNORE_DATA_STATE=1<<5;
***************
*** 58,63 ****
int i=position;
char ch;
! char[] ignorechar = new char[1]; // holds the character we're looking for when in TAG_IGNORE_DATA_STATE
! Tag tag = new Tag(new TagData(0,0,"",input));
Bool encounteredQuery = new Bool(false);
while (i<tag.getTagLine().length() &&
--- 58,64 ----
int i=position;
char ch;
! char[] ignorechar = new char[1]; // holds the character we're looking for when in TAG_IGNORE_DATA_STATE
! Tag tag = new Tag(new TagData(position, 0, reader.getLastLineNumber(), 0, "", input, "", false));
!
Bool encounteredQuery = new Bool(false);
while (i<tag.getTagLine().length() &&
***************
*** 79,83 ****
return tag;
} else
! return null;
}
--- 80,84 ----
return tag;
} else
! return null;
}
***************
*** 87,92 ****
state = toggleIgnoringState(state, ch, ignorechar);
if (state==TAG_BEFORE_PARSING_STATE && ch!='<') {
! state= TAG_ILLEGAL_STATE;
! }
if (state==TAG_IGNORE_DATA_STATE && ch=='<') {
// If the next tag char is is close tag, then
--- 88,93 ----
state = toggleIgnoringState(state, ch, ignorechar);
if (state==TAG_BEFORE_PARSING_STATE && ch!='<') {
! state= TAG_ILLEGAL_STATE;
! }
if (state==TAG_IGNORE_DATA_STATE && ch=='<') {
// If the next tag char is is close tag, then
***************
*** 96,100 ****
}
if (state==TAG_IGNORE_BEGIN_TAG_STATE && ch=='>') {
! state = TAG_IGNORE_DATA_STATE;
}
checkIfAppendable(encounteredQuery, state, ch, tag);
--- 97,101 ----
}
if (state==TAG_IGNORE_BEGIN_TAG_STATE && ch=='>') {
! state = TAG_IGNORE_DATA_STATE;
}
checkIfAppendable(encounteredQuery, state, ch, tag);
***************
*** 277,282 ****
// Annette Doyle - see testcase HTMLImageScannerTest.testImageTagOnMultipleLines()
// Further modified by Somik Raha, to remove bug - HTMLTagTest.testBrokenTag
do {
! nextLine = reader.getNextLine();
}
while (nextLine!=null && nextLine.length()==0);
--- 278,285 ----
// Annette Doyle - see testcase HTMLImageScannerTest.testImageTagOnMultipleLines()
// Further modified by Somik Raha, to remove bug - HTMLTagTest.testBrokenTag
+ int numLinesAdvanced = 0;
do {
! nextLine = reader.getNextLine();
! numLinesAdvanced++;
}
while (nextLine!=null && nextLine.length()==0);
***************
*** 288,291 ****
--- 291,298 ----
tag.append(Node.getLineSeparator());
}
+
+ // Ensure blank lines are included in tag's 'tagLines'
+ while (--numLinesAdvanced > 0)
+ tag.setTagLine("");
// We need to continue parsing to the next line
|