[Htmlparser-cvs] htmlparser/src/org/htmlparser/scanners BulletListScanner.java,1.5,1.6 CompositeTagS
Brought to you by:
derrickoswald
From: <der...@us...> - 2003-05-31 17:48:26
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners In directory sc8-pr-cvs1:/tmp/cvs-serv11008/org/htmlparser/scanners Modified Files: BulletListScanner.java CompositeTagScanner.java FormScanner.java OptionTagScanner.java SelectTagScanner.java TextareaTagScanner.java TitleScanner.java Log Message: Fixed bug #745566 StackOverflowError on select with too many unclosed options. Replicated the missing end tag detection of the bullet list scanner into the form scanners. This isn't ideal, but it seems to work. Added test case testUnclosedOptions in FormScannerTest that reads http://htmlparser.sourceforge.net/test/overflowpage.html. Note: this test only failed on Windows, prior to the fix. Picked up missing scanner tests in AllTests. The HtmlTest file showed that title tags weren't being scanned, which I fixed with a kludge. Added order list types to BulletListScanner match tags. Index: BulletListScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/BulletListScanner.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** BulletListScanner.java 26 May 2003 00:26:47 -0000 1.5 --- BulletListScanner.java 31 May 2003 17:48:21 -0000 1.6 *************** *** 40,44 **** public class BulletListScanner extends CompositeTagScanner { ! private static final String [] MATCH_STRING = {"UL"}; private final static String ENDERS [] = { "BODY", "HTML" }; private Stack ulli = new Stack(); --- 40,44 ---- public class BulletListScanner extends CompositeTagScanner { ! private static final String [] MATCH_STRING = { "UL", "OL" }; private final static String ENDERS [] = { "BODY", "HTML" }; private Stack ulli = new Stack(); Index: CompositeTagScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/CompositeTagScanner.java,v retrieving revision 1.55 retrieving revision 1.56 diff -C2 -d -r1.55 -r1.56 *** CompositeTagScanner.java 26 May 2003 00:26:47 -0000 1.55 --- CompositeTagScanner.java 31 May 2003 17:48:21 -0000 1.56 *************** *** 95,99 **** protected String [] nameOfTagToMatch; private boolean allowSelfChildren; ! private Set tagEnderSet; private Set endTagEnderSet; private boolean balance_quotes; --- 95,99 ---- protected String [] nameOfTagToMatch; private boolean allowSelfChildren; ! protected Set tagEnderSet; private Set endTagEnderSet; private boolean balance_quotes; Index: FormScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/FormScanner.java,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** FormScanner.java 26 May 2003 00:26:47 -0000 1.32 --- FormScanner.java 31 May 2003 17:48:21 -0000 1.33 *************** *** 32,35 **** --- 32,37 ---- // Java Imports // ////////////////// + import java.util.Stack; + import org.htmlparser.Parser; import org.htmlparser.tags.FormTag; *************** *** 53,56 **** --- 55,59 ---- private static final String [] formTagEnders = {"HTML","BODY" }; + private Stack stack = new Stack(); /** * HTMLFormScanner constructor comment. *************** *** 66,72 **** super(filter,MATCH_ID,formTagEnders,false); parser.addScanner(new InputTagScanner("-i")); ! parser.addScanner(new TextareaTagScanner("-t")); ! parser.addScanner(new SelectTagScanner("-select")); ! parser.addScanner(new OptionTagScanner("-option")); } --- 69,75 ---- super(filter,MATCH_ID,formTagEnders,false); parser.addScanner(new InputTagScanner("-i")); ! parser.addScanner(new TextareaTagScanner("-t",stack)); ! parser.addScanner(new SelectTagScanner("-select", stack)); ! parser.addScanner(new OptionTagScanner("-option",stack)); } *************** *** 165,171 **** --- 168,179 ---- if (formUrl!=null && formUrl.length()>0) compositeTagData.getStartTag().setAttribute("ACTION",formUrl); + if (!stack.empty () && (this == stack.peek ())) + stack.pop (); return new FormTag(tagData, compositeTagData); } + public void beforeScanningStarts() { + stack.push(this); + } } Index: OptionTagScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/OptionTagScanner.java,v retrieving revision 1.21 retrieving revision 1.22 diff -C2 -d -r1.21 -r1.22 *** OptionTagScanner.java 26 May 2003 00:26:47 -0000 1.21 --- OptionTagScanner.java 31 May 2003 17:48:21 -0000 1.22 *************** *** 29,32 **** --- 29,34 ---- package org.htmlparser.scanners; + import java.util.Stack; + import org.htmlparser.tags.OptionTag; import org.htmlparser.tags.Tag; *************** *** 37,52 **** { private static final String MATCH_NAME [] = {"OPTION"}; ! private static final String [] ENDERS = { }; ! private static final String [] END_TAG_ENDERS = { "SELECT", "BODY", "HTML" }; ! public OptionTagScanner() { ! super(MATCH_NAME[0], ENDERS, END_TAG_ENDERS, false); } ! public OptionTagScanner(String filter) { super(filter, MATCH_NAME, ENDERS, END_TAG_ENDERS, false); } ! public String [] getID() { return MATCH_NAME; } --- 39,56 ---- { private static final String MATCH_NAME [] = {"OPTION"}; ! private static final String [] ENDERS = { "INPUT", "TEXTAREA", "SELECT", "OPTION" }; ! private static final String [] END_TAG_ENDERS = { "SELECT", "FORM", "BODY", "HTML" }; ! private Stack stack; ! public OptionTagScanner(Stack stack) { ! this("", stack); } ! public OptionTagScanner(String filter, Stack stack) { super(filter, MATCH_NAME, ENDERS, END_TAG_ENDERS, false); + this.stack = stack; } ! public String [] getID() { return MATCH_NAME; } *************** *** 55,59 **** --- 59,96 ---- TagData tagData, CompositeTagData compositeTagData) { + if (!stack.empty () && (this == stack.peek ())) + stack.pop (); return new OptionTag(tagData,compositeTagData); + } + + public void beforeScanningStarts () + { + stack.push (this); + } + + /** + * This is the logic that decides when a option tag can be allowed + */ + public boolean shouldCreateEndTagAndExit () + { + boolean ret; + + ret = false; + + if (0 != stack.size ()) + { + TagScanner parentScanner = (TagScanner)stack.peek (); + if (parentScanner instanceof CompositeTagScanner) + { + CompositeTagScanner scanner = (CompositeTagScanner)parentScanner; + if (scanner.tagEnderSet.contains (MATCH_NAME[0])) // should loop over names + { + stack.pop (); + ret = true; + } + } + } + + return (ret); } } Index: SelectTagScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/SelectTagScanner.java,v retrieving revision 1.19 retrieving revision 1.20 diff -C2 -d -r1.19 -r1.20 *** SelectTagScanner.java 26 May 2003 00:26:48 -0000 1.19 --- SelectTagScanner.java 31 May 2003 17:48:21 -0000 1.20 *************** *** 29,32 **** --- 29,33 ---- package org.htmlparser.scanners; + import java.util.Stack; import org.htmlparser.Node; *************** *** 42,58 **** { private static final String MATCH_NAME [] = {"SELECT"}; private NodeList optionTags; ! public SelectTagScanner() { ! super(MATCH_NAME); } ! public SelectTagScanner(String filter) { ! super(filter,MATCH_NAME); } ! ! public String [] getID() { return MATCH_NAME; } --- 43,63 ---- { private static final String MATCH_NAME [] = {"SELECT"}; + private static final String [] ENDERS = { "INPUT", "TEXTAREA", "SELECT" }; + private static final String [] END_TAG_ENDERS = {"FORM", "BODY", "HTML" }; private NodeList optionTags; + private Stack stack; ! public SelectTagScanner(Stack stack) { ! this("", stack); } ! public SelectTagScanner(String filter, Stack stack) { ! super(filter, MATCH_NAME, ENDERS, END_TAG_ENDERS, false); ! this.stack = stack; } ! ! public String [] getID() { return MATCH_NAME; } *************** *** 62,65 **** --- 67,72 ---- TagData tagData, CompositeTagData compositeTagData) { + if (!stack.empty () && (this == stack.peek ())) + stack.pop (); return new SelectTag(tagData,compositeTagData,optionTags); } *************** *** 70,76 **** } ! public void beforeScanningStarts() { ! optionTags = new NodeList(); } } --- 77,110 ---- } ! public void beforeScanningStarts () ! { ! optionTags = new NodeList (); ! stack.push (this); } + /** + * This is the logic that decides when a option tag can be allowed + */ + public boolean shouldCreateEndTagAndExit () + { + boolean ret; + + ret = false; + + if (0 != stack.size ()) + { + TagScanner parentScanner = (TagScanner)stack.peek (); + if (parentScanner instanceof CompositeTagScanner) + { + CompositeTagScanner scanner = (CompositeTagScanner)parentScanner; + if (scanner.tagEnderSet.contains (MATCH_NAME[0])) // should loop over names + { + stack.pop (); + ret = true; + } + } + } + + return (ret); + } } Index: TextareaTagScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/TextareaTagScanner.java,v retrieving revision 1.16 retrieving revision 1.17 diff -C2 -d -r1.16 -r1.17 *** TextareaTagScanner.java 26 May 2003 00:26:48 -0000 1.16 --- TextareaTagScanner.java 31 May 2003 17:48:21 -0000 1.17 *************** *** 29,32 **** --- 29,34 ---- package org.htmlparser.scanners; + import java.util.Stack; + import org.htmlparser.tags.Tag; import org.htmlparser.tags.TextareaTag; *************** *** 37,49 **** { private static final String MATCH_NAME [] = {"TEXTAREA"}; ! public TextareaTagScanner() { ! super(MATCH_NAME); } ! public TextareaTagScanner(String filter) { ! super(filter,MATCH_NAME); } --- 39,55 ---- { private static final String MATCH_NAME [] = {"TEXTAREA"}; + private static final String [] ENDERS = { "INPUT", "TEXTAREA", "SELECT", "OPTION" }; + private static final String [] END_TAG_ENDERS = {"FORM", "BODY", "HTML" }; + private Stack stack; ! public TextareaTagScanner(Stack stack) { ! this("", stack); } ! public TextareaTagScanner(String filter, Stack stack) { ! super(filter, MATCH_NAME, ENDERS, END_TAG_ENDERS, false); ! this.stack = stack; } *************** *** 55,60 **** TagData tagData, CompositeTagData compositeTagData) { return new TextareaTag(tagData,compositeTagData); } ! } \ No newline at end of file --- 61,98 ---- TagData tagData, CompositeTagData compositeTagData) { + if (!stack.empty () && (this == stack.peek ())) + stack.pop (); return new TextareaTag(tagData,compositeTagData); } ! public void beforeScanningStarts () ! { ! stack.push (this); ! } ! ! /** ! * This is the logic that decides when a option tag can be allowed ! */ ! public boolean shouldCreateEndTagAndExit () ! { ! boolean ret; ! ! ret = false; ! ! if (0 != stack.size ()) ! { ! TagScanner parentScanner = (TagScanner)stack.peek (); ! if (parentScanner instanceof CompositeTagScanner) ! { ! CompositeTagScanner scanner = (CompositeTagScanner)parentScanner; ! if (scanner.tagEnderSet.contains (MATCH_NAME[0])) // should loop over names ! { ! stack.pop (); ! ret = true; ! } ! } ! } ! ! return (ret); ! } ! } Index: TitleScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/TitleScanner.java,v retrieving revision 1.17 retrieving revision 1.18 diff -C2 -d -r1.17 -r1.18 *** TitleScanner.java 26 May 2003 00:26:48 -0000 1.17 --- TitleScanner.java 31 May 2003 17:48:21 -0000 1.18 *************** *** 51,55 **** public boolean evaluate(String tagNameBeingChecked, TagScanner previousOpenScanner) { absorbLeadingBlanks(tagNameBeingChecked); ! return (tagNameBeingChecked.toUpperCase ().startsWith (MATCH_NAME[0]) && null == previousOpenScanner); } --- 51,56 ---- public boolean evaluate(String tagNameBeingChecked, TagScanner previousOpenScanner) { absorbLeadingBlanks(tagNameBeingChecked); ! return (tagNameBeingChecked.toUpperCase ().startsWith (MATCH_NAME[0]) && ((null == previousOpenScanner) ! || !previousOpenScanner.getID ()[0].equals ("TITLE"))); } |