[Htmlparser-cvs] htmlparser/src/org/htmlparser/scanners BodyScanner.java,1.20,1.21 BulletScanner.jav
Brought to you by:
derrickoswald
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners In directory sc8-pr-cvs1:/tmp/cvs-serv15003/scanners Modified Files: BodyScanner.java BulletScanner.java CompositeTagScanner.java FormScanner.java HeadScanner.java LabelScanner.java LinkScanner.java OptionTagScanner.java SelectTagScanner.java TableColumnScanner.java TableRowScanner.java TableScanner.java TextareaTagScanner.java TitleScanner.java Log Message: Replaced isAllowSelfChildren() using tagEnders set. Index: BodyScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/BodyScanner.java,v retrieving revision 1.20 retrieving revision 1.21 diff -C2 -d -r1.20 -r1.21 *** BodyScanner.java 26 Oct 2003 19:46:19 -0000 1.20 --- BodyScanner.java 28 Oct 2003 10:31:02 -0000 1.21 *************** *** 43,47 **** { private static final String MATCH_NAME [] = {"BODY"}; - private static final String ENDERS [] = {}; private static final String END_TAG_ENDERS [] = {"HTML"}; --- 43,46 ---- *************** *** 53,57 **** public BodyScanner(String filter) { ! super(filter,MATCH_NAME,ENDERS,END_TAG_ENDERS,false); } --- 52,56 ---- public BodyScanner(String filter) { ! super(filter,MATCH_NAME,MATCH_NAME,END_TAG_ENDERS); } Index: BulletScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/BulletScanner.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** BulletScanner.java 28 Oct 2003 03:04:18 -0000 1.25 --- BulletScanner.java 28 Oct 2003 10:31:02 -0000 1.26 *************** *** 54,58 **** public BulletScanner(String filter) { ! super(filter, MATCH_STRING, ENDERS, END_TAG_ENDERS, false); } --- 54,58 ---- public BulletScanner(String filter) { ! super(filter, MATCH_STRING, ENDERS, END_TAG_ENDERS); } Index: CompositeTagScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/CompositeTagScanner.java,v retrieving revision 1.74 retrieving revision 1.75 diff -C2 -d -r1.74 -r1.75 *** CompositeTagScanner.java 28 Oct 2003 03:04:18 -0000 1.74 --- CompositeTagScanner.java 28 Oct 2003 10:31:02 -0000 1.75 *************** *** 48,52 **** * <li>Tags which will trigger a match</li> * <li>Tags which when encountered before a legal end tag, should force a correction</li> - * <li>Preventing more tags of its own type to appear as children * </ul> * Here are examples of each:<BR> --- 48,51 ---- *************** *** 79,90 **** * This is useful when you know that a certain tag can never hold children of its own type. * e.g. <FORM> can never have more form tags within it. If it does, it is an error and should ! * be corrected. The default behavior is to allow nesting. * <pre> * MyScanner extends CompositeTagScanner { * private static final String [] MATCH_IDS = { "FORM" }; - * private static final String [] ENDERS = {}; * private static final String [] END_TAG_ENDERS = { "BODY", "HTML" }; * MyScanner() { ! * super(MATCH_IDS, ENDERS,END_TAG_ENDERS, false); * } * ... --- 78,88 ---- * This is useful when you know that a certain tag can never hold children of its own type. * e.g. <FORM> can never have more form tags within it. If it does, it is an error and should ! * be corrected. Specify the tagEnders set to contain (at least) the match ids. * <pre> * MyScanner extends CompositeTagScanner { * private static final String [] MATCH_IDS = { "FORM" }; * private static final String [] END_TAG_ENDERS = { "BODY", "HTML" }; * MyScanner() { ! * super(MATCH_IDS, MATCH_IDS, END_TAG_ENDERS, false); * } * ... *************** *** 96,100 **** { protected String [] nameOfTagToMatch; - private boolean allowSelfChildren; protected Set tagEnderSet; private Set endTagEnderSet; --- 94,97 ---- *************** *** 111,127 **** } - public CompositeTagScanner(String [] nameOfTagToMatch, String [] tagEnders, boolean allowSelfChildren) - { - this("",nameOfTagToMatch,tagEnders,allowSelfChildren); - } - public CompositeTagScanner(String filter, String [] nameOfTagToMatch) { ! this(filter,nameOfTagToMatch,new String [] {},true); ! } ! ! public CompositeTagScanner(String filter, String [] nameOfTagToMatch, String [] tagEnders) ! { ! this(filter,nameOfTagToMatch,tagEnders,true); } --- 108,114 ---- } public CompositeTagScanner(String filter, String [] nameOfTagToMatch) { ! this(filter,nameOfTagToMatch,new String [] {}); } *************** *** 129,136 **** String filter, String [] nameOfTagToMatch, ! String [] tagEnders, ! boolean allowSelfChildren) { ! this(filter,nameOfTagToMatch,tagEnders,new String[] {}, allowSelfChildren); } --- 116,122 ---- String filter, String [] nameOfTagToMatch, ! String [] tagEnders) { ! this(filter,nameOfTagToMatch,tagEnders,new String[] {}); } *************** *** 139,146 **** String [] nameOfTagToMatch, String [] tagEnders, ! String [] endTagEnders, ! boolean allowSelfChildren) { ! this(filter,nameOfTagToMatch,tagEnders,endTagEnders, allowSelfChildren, false); } --- 125,131 ---- String [] nameOfTagToMatch, String [] tagEnders, ! String [] endTagEnders) { ! this(filter,nameOfTagToMatch,tagEnders,endTagEnders, false); } *************** *** 171,180 **** String [] tagEnders, String [] endTagEnders, - boolean allowSelfChildren, boolean balance_quotes) { super(filter); this.nameOfTagToMatch = nameOfTagToMatch; - this.allowSelfChildren = allowSelfChildren; this.balance_quotes = balance_quotes; this.tagEnderSet = new HashSet(); --- 156,163 ---- *************** *** 193,200 **** * If it's not an empty XML tag, the lexer is repeatedly asked for * subsequent nodes until an end tag is found or a node is encountered ! * that matches the tag ender set or end tag ender set, or a node of ! * the same type is found and {@link #isAllowSelfChildren} returns ! * <code>false</code>. In all but the first case, a virtual end tag ! * is created. Each node found that is not the end tag is added to * the list of children.<p> * The scanner's {@link #createTag} method is called with details about --- 176,182 ---- * If it's not an empty XML tag, the lexer is repeatedly asked for * subsequent nodes until an end tag is found or a node is encountered ! * that matches the tag ender set or end tag ender set. ! * In the latter case, a virtual end tag is created. ! * Each node found that is not the end tag is added to * the list of children.<p> * The scanner's {@link #createTag} method is called with details about *************** *** 213,216 **** --- 195,199 ---- NodeList nodeList; Tag endTag; + String match; String name; TagScanner scanner; *************** *** 220,223 **** --- 203,207 ---- nodeList = new NodeList (); endTag = null; + match = tag.getTagName (); if (tag.isEmptyXmlTag ()) *************** *** 234,248 **** name = next.getTagName (); // check for normal end tag ! if (next.isEndTag () && name.equals (tag.getTagName ())) { endTag = next; node = null; } ! else if (isTagToBeEndedFor (next) || // check DTD ! ( // check for child of same name not allowed ! !(next.isEndTag ()) && ! !isAllowSelfChildren () && ! name.equals (tag.getTagName ()) ! )) { // insert a virtual end tag and backup one node --- 218,227 ---- name = next.getTagName (); // check for normal end tag ! if (next.isEndTag () && name.equals (match)) { endTag = next; node = null; } ! else if (isTagToBeEndedFor (next)) // check DTD { // insert a virtual end tag and backup one node *************** *** 338,341 **** --- 317,321 ---- ret = false; + name = tag.getTagName (); if (tag.isEndTag ()) *************** *** 345,353 **** return (ret); - } - - public final boolean isAllowSelfChildren() - { - return allowSelfChildren; } } --- 325,328 ---- Index: FormScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/FormScanner.java,v retrieving revision 1.49 retrieving revision 1.50 diff -C2 -d -r1.49 -r1.50 *** FormScanner.java 28 Oct 2003 03:04:18 -0000 1.49 --- FormScanner.java 28 Oct 2003 10:31:02 -0000 1.50 *************** *** 47,51 **** public static final String PREVIOUS_DIRTY_LINK_MESSAGE="Encountered a form tag after an open link tag.\nThere should have been an end tag for the link before the form tag began.\nCorrecting this.."; private boolean linkScannerAlreadyOpen=false; ! private static final String [] formTagEnders = {"HTML","BODY"}; /** --- 47,51 ---- public static final String PREVIOUS_DIRTY_LINK_MESSAGE="Encountered a form tag after an open link tag.\nThere should have been an end tag for the link before the form tag began.\nCorrecting this.."; private boolean linkScannerAlreadyOpen=false; ! private static final String [] formTagEnders = {"FORM","HTML","BODY"}; /** *************** *** 64,68 **** public FormScanner(String filter, Parser parser) { ! super(filter,MATCH_ID,formTagEnders,false); parser.addScanner(new InputTagScanner("-i")); parser.addScanner(new TextareaTagScanner("-t")); --- 64,68 ---- public FormScanner(String filter, Parser parser) { ! super(filter,MATCH_ID,formTagEnders); parser.addScanner(new InputTagScanner("-i")); parser.addScanner(new TextareaTagScanner("-t")); Index: HeadScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/HeadScanner.java,v retrieving revision 1.17 retrieving revision 1.18 diff -C2 -d -r1.17 -r1.18 *** HeadScanner.java 26 Oct 2003 19:46:19 -0000 1.17 --- HeadScanner.java 28 Oct 2003 10:31:02 -0000 1.18 *************** *** 45,49 **** { private static final String MATCH_NAME [] = {"HEAD"}; ! private static final String ENDERS [] = {"BODY"}; private static final String END_TAG_ENDERS [] = {"HTML"}; --- 45,49 ---- { private static final String MATCH_NAME [] = {"HEAD"}; ! private static final String ENDERS [] = {"HEAD","BODY"}; private static final String END_TAG_ENDERS [] = {"HTML"}; *************** *** 55,59 **** public HeadScanner(String filter) { ! super(filter,MATCH_NAME,ENDERS,END_TAG_ENDERS,false); } --- 55,59 ---- public HeadScanner(String filter) { ! super(filter,MATCH_NAME,ENDERS,END_TAG_ENDERS); } Index: LabelScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/LabelScanner.java,v retrieving revision 1.35 retrieving revision 1.36 diff -C2 -d -r1.35 -r1.36 *** LabelScanner.java 26 Oct 2003 19:46:20 -0000 1.35 --- LabelScanner.java 28 Oct 2003 10:31:02 -0000 1.36 *************** *** 43,51 **** public LabelScanner() { ! super(MATCH_NAME,new String [] {},false); } public LabelScanner(String filter) { ! super(filter,MATCH_NAME,new String [] {},false); } --- 43,51 ---- public LabelScanner() { ! super(MATCH_NAME,MATCH_NAME); } public LabelScanner(String filter) { ! super(filter,MATCH_NAME,MATCH_NAME); } Index: LinkScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/LinkScanner.java,v retrieving revision 1.57 retrieving revision 1.58 diff -C2 -d -r1.57 -r1.58 *** LinkScanner.java 27 Oct 2003 02:18:04 -0000 1.57 --- LinkScanner.java 28 Oct 2003 10:31:02 -0000 1.58 *************** *** 54,58 **** public static final String LINK_SCANNER_ID = "A"; public LinkProcessor processor; ! private final static String ENDERS [] = { "TD","TR","FORM","LI","BODY", "HTML" }; private final static String ENDTAG_ENDERS [] = { "TD","TR","FORM","LI","BODY", "HTML" }; --- 54,58 ---- public static final String LINK_SCANNER_ID = "A"; public LinkProcessor processor; ! private final static String ENDERS [] = { "A","TD","TR","FORM","LI","BODY", "HTML" }; private final static String ENDTAG_ENDERS [] = { "TD","TR","FORM","LI","BODY", "HTML" }; *************** *** 68,72 **** */ public LinkScanner(String filter) { ! super(filter,MATCH_NAME,ENDERS,ENDTAG_ENDERS, false); processor = new LinkProcessor(); } --- 68,72 ---- */ public LinkScanner(String filter) { ! super(filter,MATCH_NAME,ENDERS,ENDTAG_ENDERS); processor = new LinkProcessor(); } Index: OptionTagScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/OptionTagScanner.java,v retrieving revision 1.36 retrieving revision 1.37 diff -C2 -d -r1.36 -r1.37 *** OptionTagScanner.java 28 Oct 2003 03:04:18 -0000 1.36 --- OptionTagScanner.java 28 Oct 2003 10:31:02 -0000 1.37 *************** *** 44,48 **** public OptionTagScanner(String filter) { ! super(filter, MATCH_NAME, ENDERS, END_TAG_ENDERS, false); } --- 44,48 ---- public OptionTagScanner(String filter) { ! super(filter, MATCH_NAME, ENDERS, END_TAG_ENDERS); } Index: SelectTagScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/SelectTagScanner.java,v retrieving revision 1.34 retrieving revision 1.35 diff -C2 -d -r1.34 -r1.35 *** SelectTagScanner.java 28 Oct 2003 03:04:18 -0000 1.34 --- SelectTagScanner.java 28 Oct 2003 10:31:02 -0000 1.35 *************** *** 48,52 **** public SelectTagScanner(String filter) { ! super(filter, MATCH_NAME, ENDERS, END_TAG_ENDERS, false); } --- 48,52 ---- public SelectTagScanner(String filter) { ! super(filter, MATCH_NAME, ENDERS, END_TAG_ENDERS); } Index: TableColumnScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/TableColumnScanner.java,v retrieving revision 1.37 retrieving revision 1.38 diff -C2 -d -r1.37 -r1.38 *** TableColumnScanner.java 26 Oct 2003 19:46:21 -0000 1.37 --- TableColumnScanner.java 28 Oct 2003 10:31:02 -0000 1.38 *************** *** 44,48 **** public TableColumnScanner(String filter) { ! this(filter, MATCH_STRING, new String[] {}, new String[] {}, false); } --- 44,48 ---- public TableColumnScanner(String filter) { ! this(filter, MATCH_STRING, MATCH_STRING, new String[] {}); } *************** *** 51,63 **** String[] nameOfTagToMatch, String [] tagEnders, ! String [] endTagEnders, ! boolean allowSelfChildren) { super( filter, nameOfTagToMatch, tagEnders, ! endTagEnders, ! allowSelfChildren ! ); } public Tag createTag(Page page, int start, int end, Vector attributes, Tag startTag, Tag endTag, NodeList children) throws ParserException --- 51,60 ---- String[] nameOfTagToMatch, String [] tagEnders, ! String [] endTagEnders) { super( filter, nameOfTagToMatch, tagEnders, ! endTagEnders); } public Tag createTag(Page page, int start, int end, Vector attributes, Tag startTag, Tag endTag, NodeList children) throws ParserException Index: TableRowScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/TableRowScanner.java,v retrieving revision 1.40 retrieving revision 1.41 diff -C2 -d -r1.40 -r1.41 *** TableRowScanner.java 26 Oct 2003 19:46:21 -0000 1.40 --- TableRowScanner.java 28 Oct 2003 10:31:02 -0000 1.41 *************** *** 46,50 **** public TableRowScanner(String filter,Parser parser) { ! this(filter, parser, MATCH_STRING, new String[] {}, new String[] {}, false); } --- 46,50 ---- public TableRowScanner(String filter,Parser parser) { ! this(filter, parser, MATCH_STRING, MATCH_STRING, new String[] {}); } *************** *** 54,66 **** String[] nameOfTagToMatch, String [] tagEnders, ! String [] endTagEnders, ! boolean allowSelfChildren) { super( filter, nameOfTagToMatch, tagEnders, ! endTagEnders, ! allowSelfChildren ! ); parser.addScanner(new TableColumnScanner()); } --- 54,63 ---- String[] nameOfTagToMatch, String [] tagEnders, ! String [] endTagEnders) { super( filter, nameOfTagToMatch, tagEnders, ! endTagEnders); parser.addScanner(new TableColumnScanner()); } Index: TableScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/TableScanner.java,v retrieving revision 1.39 retrieving revision 1.40 diff -C2 -d -r1.39 -r1.40 *** TableScanner.java 26 Oct 2003 19:46:21 -0000 1.39 --- TableScanner.java 28 Oct 2003 10:31:02 -0000 1.40 *************** *** 48,54 **** public TableScanner(Parser parser,String filter) { ! super(filter, MATCH_STRING, ENDERS, ENDTAG_ENDERS, true); parser.addScanner(new TableRowScanner(parser)); - } --- 48,53 ---- public TableScanner(Parser parser,String filter) { ! super(filter, MATCH_STRING, ENDERS, ENDTAG_ENDERS); parser.addScanner(new TableRowScanner(parser)); } Index: TextareaTagScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/TextareaTagScanner.java,v retrieving revision 1.31 retrieving revision 1.32 diff -C2 -d -r1.31 -r1.32 *** TextareaTagScanner.java 28 Oct 2003 03:04:18 -0000 1.31 --- TextareaTagScanner.java 28 Oct 2003 10:31:02 -0000 1.32 *************** *** 45,49 **** public TextareaTagScanner(String filter) { ! super(filter, MATCH_NAME, ENDERS, END_TAG_ENDERS, false); } --- 45,49 ---- public TextareaTagScanner(String filter) { ! super(filter, MATCH_NAME, ENDERS, END_TAG_ENDERS); } Index: TitleScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/TitleScanner.java,v retrieving revision 1.33 retrieving revision 1.34 diff -C2 -d -r1.33 -r1.34 *** TitleScanner.java 27 Oct 2003 02:18:04 -0000 1.33 --- TitleScanner.java 28 Oct 2003 10:31:02 -0000 1.34 *************** *** 42,50 **** public class TitleScanner extends CompositeTagScanner { private static final String MATCH_NAME [] = {"TITLE"}; ! private static final String ENDERS [] = {"BODY"}; private static final String END_TAG_ENDERS [] = {"HEAD", "HTML"}; public TitleScanner(String filter) { ! super(filter,MATCH_NAME,ENDERS,END_TAG_ENDERS,false); } --- 42,50 ---- public class TitleScanner extends CompositeTagScanner { private static final String MATCH_NAME [] = {"TITLE"}; ! private static final String ENDERS [] = {"TITLE","BODY"}; private static final String END_TAG_ENDERS [] = {"HEAD", "HTML"}; public TitleScanner(String filter) { ! super(filter,MATCH_NAME,ENDERS,END_TAG_ENDERS); } |