[Htmlparser-cvs] htmlparser/src/org/htmlparser/scanners AppletScanner.java,1.32,1.33 BaseHrefScanner
Brought to you by:
derrickoswald
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners In directory sc8-pr-cvs1:/tmp/cvs-serv26197/scanners Modified Files: AppletScanner.java BaseHrefScanner.java BodyScanner.java BulletListScanner.java BulletScanner.java CompositeTagScanner.java DivScanner.java DoctypeScanner.java FormScanner.java FrameScanner.java FrameSetScanner.java HeadScanner.java HtmlScanner.java ImageScanner.java InputTagScanner.java JspScanner.java LabelScanner.java LinkScanner.java MetaTagScanner.java OptionTagScanner.java ScriptScanner.java SelectTagScanner.java SpanScanner.java StyleScanner.java TableColumnScanner.java TableRowScanner.java TableScanner.java TagScanner.java TextareaTagScanner.java TitleScanner.java Log Message: Removed lexer level AbstractNode. Removed data package from parser level tags. Separated tag creation from recursion in NodeFactory interface. Index: AppletScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/AppletScanner.java,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** AppletScanner.java 22 Sep 2003 02:40:00 -0000 1.32 --- AppletScanner.java 20 Oct 2003 01:28:02 -0000 1.33 *************** *** 29,42 **** package org.htmlparser.scanners; import org.htmlparser.tags.AppletTag; import org.htmlparser.tags.Tag; ! import org.htmlparser.tags.data.CompositeTagData; ! import org.htmlparser.tags.data.TagData; import org.htmlparser.util.ParserException; /** ! * Scanner for Applet tags */ ! public class AppletScanner extends CompositeTagScanner { private static String [] MATCH_STRING = {"APPLET"}; --- 29,45 ---- package org.htmlparser.scanners; + + import java.util.Vector; + import org.htmlparser.lexer.Page; import org.htmlparser.tags.AppletTag; import org.htmlparser.tags.Tag; ! import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; /** ! * Scanner for applet tags. */ ! public class AppletScanner extends CompositeTagScanner ! { private static String [] MATCH_STRING = {"APPLET"}; *************** *** 53,61 **** } ! public Tag createTag(TagData tagData, CompositeTagData compositeTagData) ! throws ParserException { ! return new AppletTag(tagData,compositeTagData); ! } } --- 56,73 ---- } ! public Tag createTag(Page page, int start, int end, Vector attributes, Tag startTag, Tag endTag, NodeList children) throws ParserException ! { ! AppletTag ret; ! ret = new AppletTag (); ! ret.setPage (page); ! ret.setStartPosition (start); ! ret.setEndPosition (end); ! ret.setAttributesEx (attributes); ! ret.setStartTag (startTag); ! ret.setEndTag (endTag); ! ret.setChildren (children); + return (ret); + } } Index: BaseHrefScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/BaseHrefScanner.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** BaseHrefScanner.java 22 Sep 2003 02:40:00 -0000 1.26 --- BaseHrefScanner.java 20 Oct 2003 01:28:03 -0000 1.27 *************** *** 29,51 **** package org.htmlparser.scanners; import org.htmlparser.tags.BaseHrefTag; import org.htmlparser.tags.Tag; - import org.htmlparser.tags.data.TagData; import org.htmlparser.util.LinkProcessor; import org.htmlparser.util.ParserException; ! public class BaseHrefScanner extends TagScanner { private LinkProcessor processor; ! public BaseHrefScanner() { super(); } ! public BaseHrefScanner(String filter,LinkProcessor processor) { super(filter); this.processor = processor; } ! public String [] getID() { String [] ids = new String[1]; ids[0] = "BASE"; --- 29,61 ---- package org.htmlparser.scanners; + import java.util.Vector; + import org.htmlparser.lexer.Page; import org.htmlparser.tags.BaseHrefTag; import org.htmlparser.tags.Tag; import org.htmlparser.util.LinkProcessor; import org.htmlparser.util.ParserException; ! /** ! * Scanner for base tags. ! * Even though BASE is not a composite tag, this scanner is present to ! * handle setting the base href which is referenced by other tags. ! */ ! public class BaseHrefScanner extends TagScanner ! { private LinkProcessor processor; ! public BaseHrefScanner() ! { super(); } ! public BaseHrefScanner(String filter,LinkProcessor processor) ! { super(filter); this.processor = processor; } ! public String [] getID() ! { String [] ids = new String[1]; ids[0] = "BASE"; *************** *** 53,65 **** } ! protected Tag createTag(TagData tagData, Tag tag, String url) ! throws ParserException { ! String baseUrl = (String)tag.getAttribute("HREF"); ! String absoluteBaseUrl=""; ! if (baseUrl != null && baseUrl.length()>0) { ! absoluteBaseUrl = LinkProcessor.removeLastSlash(baseUrl.trim()); ! processor.setBaseUrl(absoluteBaseUrl); ! } ! return new BaseHrefTag(tagData,absoluteBaseUrl); } } --- 63,82 ---- } ! protected Tag createTag (Page page, int start, int end, Vector attributes, Tag tag, String url) throws ParserException ! { ! BaseHrefTag ret; ! ! ret = new BaseHrefTag (); ! ret.setPage (page); ! ret.setStartPosition (start); ! ret.setEndPosition (end); ! ret.setAttributesEx (attributes); ! ! // special step here ! // Need to set the base url for the current link processor, ! // which can't be done in the tag because it doesn't have it. ! processor.setBaseUrl (ret.getBaseUrl ()); ! ! return (ret); } } Index: BodyScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/BodyScanner.java,v retrieving revision 1.18 retrieving revision 1.19 diff -C2 -d -r1.18 -r1.19 *** BodyScanner.java 22 Sep 2003 02:40:00 -0000 1.18 --- BodyScanner.java 20 Oct 2003 01:28:03 -0000 1.19 *************** *** 30,63 **** package org.htmlparser.scanners; import org.htmlparser.tags.BodyTag; import org.htmlparser.tags.Tag; ! import org.htmlparser.tags.data.CompositeTagData; ! import org.htmlparser.tags.data.TagData; /** ! * Scans body tags. */ ! public class BodyScanner extends CompositeTagScanner { private static final String MATCH_NAME [] = {"BODY"}; private static final String ENDERS [] = {}; private static final String END_TAG_ENDERS [] = {"HTML"}; ! public BodyScanner() { this(""); } ! public BodyScanner(String filter) { super(filter,MATCH_NAME,ENDERS,END_TAG_ENDERS,false); } ! public String [] getID() { return MATCH_NAME; } ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new BodyTag(tagData,compositeTagData); ! } } --- 30,78 ---- package org.htmlparser.scanners; + import java.util.Vector; + import org.htmlparser.lexer.Page; import org.htmlparser.tags.BodyTag; import org.htmlparser.tags.Tag; ! import org.htmlparser.util.NodeList; ! import org.htmlparser.util.ParserException; /** ! * Scanner for body tags. */ ! public class BodyScanner extends CompositeTagScanner ! { private static final String MATCH_NAME [] = {"BODY"}; private static final String ENDERS [] = {}; private static final String END_TAG_ENDERS [] = {"HTML"}; ! public BodyScanner() ! { this(""); } ! public BodyScanner(String filter) ! { super(filter,MATCH_NAME,ENDERS,END_TAG_ENDERS,false); } ! public String [] getID() ! { return MATCH_NAME; } ! public Tag createTag(Page page, int start, int end, Vector attributes, Tag startTag, Tag endTag, NodeList children) throws ParserException ! { ! BodyTag ret; + ret = new BodyTag (); + ret.setPage (page); + ret.setStartPosition (start); + ret.setEndPosition (end); + ret.setAttributesEx (attributes); + ret.setStartTag (startTag); + ret.setEndTag (endTag); + ret.setChildren (children); + + return (ret); + } } Index: BulletListScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/BulletListScanner.java,v retrieving revision 1.17 retrieving revision 1.18 diff -C2 -d -r1.17 -r1.18 *** BulletListScanner.java 22 Sep 2003 02:40:00 -0000 1.17 --- BulletListScanner.java 20 Oct 2003 01:28:03 -0000 1.18 *************** *** 30,66 **** import java.util.Stack; import org.htmlparser.Parser; import org.htmlparser.tags.BulletList; import org.htmlparser.tags.Tag; ! import org.htmlparser.tags.data.CompositeTagData; ! import org.htmlparser.tags.data.TagData; import org.htmlparser.util.ParserException; ! ! public class BulletListScanner extends CompositeTagScanner { private static final String [] MATCH_STRING = { "UL", "OL" }; private final static String ENDERS [] = { "BODY", "HTML" }; private Stack ulli = new Stack(); ! public BulletListScanner(Parser parser) { this("",parser); } ! public BulletListScanner(String filter, Parser parser) { super(filter, MATCH_STRING, ENDERS); parser.addScanner(new BulletScanner("-bullet",ulli)); } ! public Tag createTag(TagData tagData, CompositeTagData compositeTagData) ! throws ParserException { ! return new BulletList(tagData,compositeTagData); } ! public String[] getID() { return MATCH_STRING; } ! public void beforeScanningStarts() { ulli.push(this); } --- 30,86 ---- import java.util.Stack; + import java.util.Vector; import org.htmlparser.Parser; + import org.htmlparser.lexer.Page; import org.htmlparser.tags.BulletList; import org.htmlparser.tags.Tag; ! import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; ! /** ! * Scanner for bullet lists. ! * Handles both UL and OL tags. ! */ ! public class BulletListScanner extends CompositeTagScanner ! { private static final String [] MATCH_STRING = { "UL", "OL" }; private final static String ENDERS [] = { "BODY", "HTML" }; private Stack ulli = new Stack(); ! public BulletListScanner(Parser parser) ! { this("",parser); } ! public BulletListScanner(String filter, Parser parser) ! { super(filter, MATCH_STRING, ENDERS); parser.addScanner(new BulletScanner("-bullet",ulli)); } ! public Tag createTag(Page page, int start, int end, Vector attributes, Tag startTag, Tag endTag, NodeList children) throws ParserException ! { ! BulletList ret; ! ! ret = new BulletList (); ! ret.setPage (page); ! ret.setStartPosition (start); ! ret.setEndPosition (end); ! ret.setAttributesEx (attributes); ! ret.setStartTag (startTag); ! ret.setEndTag (endTag); ! ret.setChildren (children); ! ! return (ret); } ! public String[] getID() ! { return MATCH_STRING; } ! public void beforeScanningStarts() ! { ulli.push(this); } Index: BulletScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/BulletScanner.java,v retrieving revision 1.22 retrieving revision 1.23 diff -C2 -d -r1.22 -r1.23 *** BulletScanner.java 22 Sep 2003 02:40:00 -0000 1.22 --- BulletScanner.java 20 Oct 2003 01:28:03 -0000 1.23 *************** *** 30,38 **** import java.util.Stack; import org.htmlparser.tags.Bullet; import org.htmlparser.tags.Tag; ! import org.htmlparser.tags.data.CompositeTagData; ! import org.htmlparser.tags.data.TagData; import org.htmlparser.util.ParserException; --- 30,39 ---- import java.util.Stack; + import java.util.Vector; + import org.htmlparser.lexer.Page; import org.htmlparser.tags.Bullet; import org.htmlparser.tags.Tag; ! import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; *************** *** 46,50 **** * These rules are implemented easily through the shared stack. */ ! public class BulletScanner extends CompositeTagScanner { private static final String [] MATCH_STRING = {"LI"}; private final static String ENDERS [] = { "BODY", "HTML" }; --- 47,52 ---- * These rules are implemented easily through the shared stack. */ ! public class BulletScanner extends CompositeTagScanner ! { private static final String [] MATCH_STRING = {"LI"}; private final static String ENDERS [] = { "BODY", "HTML" }; *************** *** 52,70 **** private Stack ulli; ! public BulletScanner(Stack ulli) { this("",ulli); } ! public BulletScanner(String filter, Stack ulli) { super(filter, MATCH_STRING, ENDERS, END_TAG_ENDERS, false); this.ulli = ulli; } ! public Tag createTag(TagData tagData, CompositeTagData compositeTagData) ! throws ParserException { ! return new Bullet(tagData,compositeTagData); } ! public String[] getID() { return MATCH_STRING; } --- 54,86 ---- private Stack ulli; ! public BulletScanner(Stack ulli) ! { this("",ulli); } ! public BulletScanner(String filter, Stack ulli) ! { super(filter, MATCH_STRING, ENDERS, END_TAG_ENDERS, false); this.ulli = ulli; } ! public Tag createTag(Page page, int start, int end, Vector attributes, Tag startTag, Tag endTag, NodeList children) throws ParserException ! { ! Bullet ret; ! ! ret = new Bullet (); ! ret.setPage (page); ! ret.setStartPosition (start); ! ret.setEndPosition (end); ! ret.setAttributesEx (attributes); ! ret.setStartTag (startTag); ! ret.setEndTag (endTag); ! ret.setChildren (children); ! ! return (ret); } ! public String[] getID() ! { return MATCH_STRING; } *************** *** 73,87 **** * This is the logic that decides when a bullet tag can be allowed */ ! public boolean shouldCreateEndTagAndExit() { ! if (ulli.size()==0) return false; CompositeTagScanner parentScanner = (CompositeTagScanner)ulli.peek(); ! if (parentScanner == this) { ulli.pop(); return true; ! } else return false; } ! public void beforeScanningStarts() { ulli.push(this); } --- 89,108 ---- * This is the logic that decides when a bullet tag can be allowed */ ! public boolean shouldCreateEndTagAndExit() ! { ! if (ulli.size()==0) ! return false; CompositeTagScanner parentScanner = (CompositeTagScanner)ulli.peek(); ! if (parentScanner == this) ! { ulli.pop(); return true; ! } ! else return false; } ! public void beforeScanningStarts() ! { ulli.push(this); } Index: CompositeTagScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/CompositeTagScanner.java,v retrieving revision 1.69 retrieving revision 1.70 diff -C2 -d -r1.69 -r1.70 *** CompositeTagScanner.java 5 Oct 2003 13:49:52 -0000 1.69 --- CompositeTagScanner.java 20 Oct 2003 01:28:03 -0000 1.70 *************** *** 31,45 **** import java.util.HashSet; import java.util.Set; import org.htmlparser.Node; import org.htmlparser.lexer.Lexer; import org.htmlparser.parserHelper.CompositeTagScannerHelper; import org.htmlparser.tags.Tag; ! import org.htmlparser.tags.data.CompositeTagData; ! import org.htmlparser.tags.data.TagData; import org.htmlparser.util.ParserException; /** ! * To create your own scanner that can hold children, create a subclass of this class. * The composite tag scanner can be configured with:<br> * <ul> --- 31,46 ---- import java.util.HashSet; import java.util.Set; + import java.util.Vector; import org.htmlparser.Node; import org.htmlparser.lexer.Lexer; + import org.htmlparser.lexer.Page; import org.htmlparser.parserHelper.CompositeTagScannerHelper; import org.htmlparser.tags.Tag; ! import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; /** ! * To create your own scanner that can create tags tht hold children, create a subclass of this class. * The composite tag scanner can be configured with:<br> * <ul> *************** *** 91,95 **** * Inside the scanner, use createTag() to specify what tag needs to be created. */ ! public abstract class CompositeTagScanner extends TagScanner { protected String [] nameOfTagToMatch; private boolean allowSelfChildren; --- 92,97 ---- * Inside the scanner, use createTag() to specify what tag needs to be created. */ ! public abstract class CompositeTagScanner extends TagScanner ! { protected String [] nameOfTagToMatch; private boolean allowSelfChildren; *************** *** 98,118 **** private boolean balance_quotes; ! public CompositeTagScanner(String [] nameOfTagToMatch) { this(nameOfTagToMatch,new String[] {}); } ! public CompositeTagScanner(String [] nameOfTagToMatch, String [] tagEnders) { this("",nameOfTagToMatch,tagEnders); } ! public CompositeTagScanner(String [] nameOfTagToMatch, String [] tagEnders, boolean allowSelfChildren) { this("",nameOfTagToMatch,tagEnders,allowSelfChildren); } ! public CompositeTagScanner(String filter, String [] nameOfTagToMatch) { this(filter,nameOfTagToMatch,new String [] {},true); } ! public CompositeTagScanner(String filter, String [] nameOfTagToMatch, String [] tagEnders) { this(filter,nameOfTagToMatch,tagEnders,true); } --- 100,125 ---- private boolean balance_quotes; ! public CompositeTagScanner(String [] nameOfTagToMatch) ! { this(nameOfTagToMatch,new String[] {}); } ! public CompositeTagScanner(String [] nameOfTagToMatch, String [] tagEnders) ! { this("",nameOfTagToMatch,tagEnders); } ! public CompositeTagScanner(String [] nameOfTagToMatch, String [] tagEnders, boolean allowSelfChildren) ! { this("",nameOfTagToMatch,tagEnders,allowSelfChildren); } ! public CompositeTagScanner(String filter, String [] nameOfTagToMatch) ! { this(filter,nameOfTagToMatch,new String [] {},true); } ! public CompositeTagScanner(String filter, String [] nameOfTagToMatch, String [] tagEnders) ! { this(filter,nameOfTagToMatch,tagEnders,true); } *************** *** 122,126 **** String [] nameOfTagToMatch, String [] tagEnders, ! boolean allowSelfChildren) { this(filter,nameOfTagToMatch,tagEnders,new String[] {}, allowSelfChildren); } --- 129,134 ---- String [] nameOfTagToMatch, String [] tagEnders, ! boolean allowSelfChildren) ! { this(filter,nameOfTagToMatch,tagEnders,new String[] {}, allowSelfChildren); } *************** *** 163,167 **** String [] endTagEnders, boolean allowSelfChildren, ! boolean balance_quotes) { super(filter); this.nameOfTagToMatch = nameOfTagToMatch; --- 171,176 ---- String [] endTagEnders, boolean allowSelfChildren, ! boolean balance_quotes) ! { super(filter); this.nameOfTagToMatch = nameOfTagToMatch; *************** *** 176,180 **** } ! public Tag scan (Tag tag, String url, Lexer lexer) throws ParserException { CompositeTagScannerHelper helper = new CompositeTagScannerHelper(this, tag, lexer, balance_quotes); --- 185,190 ---- } ! public Tag scan (Tag tag, String url, Lexer lexer) throws ParserException ! { CompositeTagScannerHelper helper = new CompositeTagScannerHelper(this, tag, lexer, balance_quotes); *************** *** 187,191 **** * but before the scanner begins its processing. */ ! public void beforeScanningStarts() { } --- 197,202 ---- * but before the scanner begins its processing. */ ! public void beforeScanningStarts() ! { } *************** *** 194,218 **** * need to store special children seperately. Though, all children are collected anyway into a node list. */ ! public void childNodeEncountered(Node node) { } /** * For composite tags this shouldn't be used and hence throws an exception. - * @param tagData - * @param tag - * @param url - * @return Tag - * @throws ParserException */ ! protected Tag createTag(TagData tagData, Tag tag, String url) throws ParserException { ! throw new IllegalStateException ("composite tags shouldn't be using this"); } /** ! * You must override this method to create the tag of your choice upon successful parsing. Data required ! * for construction of your tag can be found within tagData and compositeTagData */ ! public abstract Tag createTag(TagData tagData, CompositeTagData compositeTagData) throws ParserException; public final boolean isTagToBeEndedFor(Tag tag) --- 205,236 ---- * need to store special children seperately. Though, all children are collected anyway into a node list. */ ! public void childNodeEncountered(Node node) ! { } /** * For composite tags this shouldn't be used and hence throws an exception. */ ! protected Tag createTag (Page page, int start, int end, Vector attributes, Tag tag, String url) throws ParserException { ! throw new ParserException ("composite tags shouldn't be using this"); } /** ! * You must override this method to create the tag of your choice upon successful parsing. ! * This method is called after the scanner has completed the scan. ! * The first four arguments are standard tag constructor arguments. ! * The last three are for the composite tag construction. ! * @param page The page the tag is found on. ! * @param start The starting offset in the page of the tag. ! * @param end The ending offset in the page of the tag. ! * @param attributes The contents of the tag as a list of {@list Attribute} objects. ! * @param startTag The tag that begins the composite tag. ! * @param endTag The tag that ends the composite tag. Note this could be a ! * virtual tag created to satisfy the scanner (check is it's starting and ! * ending position are the same). ! * @param children The list of nodes contained within the ebgin end tag pair. */ ! public abstract Tag createTag(Page page, int start, int end, Vector attributes, Tag startTag, Tag endTag, NodeList children) throws ParserException; public final boolean isTagToBeEndedFor(Tag tag) Index: DivScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/DivScanner.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** DivScanner.java 22 Sep 2003 02:40:00 -0000 1.30 --- DivScanner.java 20 Oct 2003 01:28:03 -0000 1.31 *************** *** 29,55 **** package org.htmlparser.scanners; import org.htmlparser.tags.Div; import org.htmlparser.tags.Tag; ! import org.htmlparser.tags.data.CompositeTagData; ! import org.htmlparser.tags.data.TagData; ! public class DivScanner extends CompositeTagScanner { private static String MATCH_STRING [] = {"DIV"}; ! public DivScanner() { this(""); } ! public DivScanner(String filter) { super(filter, MATCH_STRING); } ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new Div(tagData,compositeTagData); } ! public String[] getID() { return MATCH_STRING; } --- 29,74 ---- package org.htmlparser.scanners; + import java.util.Vector; + import org.htmlparser.lexer.Page; import org.htmlparser.tags.Div; import org.htmlparser.tags.Tag; ! import org.htmlparser.util.NodeList; ! import org.htmlparser.util.ParserException; ! /** ! * A scanner for dic tags. ! */ ! public class DivScanner extends CompositeTagScanner ! { private static String MATCH_STRING [] = {"DIV"}; ! public DivScanner() ! { this(""); } ! public DivScanner(String filter) ! { super(filter, MATCH_STRING); } ! public Tag createTag(Page page, int start, int end, Vector attributes, Tag startTag, Tag endTag, NodeList children) throws ParserException ! { ! Div ret; ! ! ret = new Div (); ! ret.setPage (page); ! ret.setStartPosition (start); ! ret.setEndPosition (end); ! ret.setAttributesEx (attributes); ! ret.setStartTag (startTag); ! ret.setEndTag (endTag); ! ret.setChildren (children); ! ! return (ret); } ! public String[] getID() ! { return MATCH_STRING; } Index: DoctypeScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/DoctypeScanner.java,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** DoctypeScanner.java 28 Sep 2003 15:33:58 -0000 1.28 --- DoctypeScanner.java 20 Oct 2003 01:28:03 -0000 1.29 *************** *** 28,53 **** package org.htmlparser.scanners; ! ///////////////////////// ! // HTML Parser Imports // ! ///////////////////////// import org.htmlparser.tags.DoctypeTag; import org.htmlparser.tags.Tag; - import org.htmlparser.tags.data.TagData; import org.htmlparser.util.ParserException; /** ! * The HTMLDoctypeScanner identifies Doctype tags */ ! ! public class DoctypeScanner extends TagScanner { ! public DoctypeScanner() { super(); } ! public DoctypeScanner(String filter) { super(filter); } ! public String [] getID() { String [] ids = new String[1]; ids[0] = "!DOCTYPE"; --- 28,56 ---- package org.htmlparser.scanners; ! ! import java.util.Vector; ! import org.htmlparser.lexer.Page; ! import org.htmlparser.tags.DoctypeTag; import org.htmlparser.tags.Tag; import org.htmlparser.util.ParserException; /** ! * The DoctypeScanner identifies doctype tags. */ ! public class DoctypeScanner extends TagScanner ! { ! public DoctypeScanner() ! { super(); } ! public DoctypeScanner(String filter) ! { super(filter); } ! public String [] getID() ! { String [] ids = new String[1]; ids[0] = "!DOCTYPE"; *************** *** 55,65 **** } ! protected Tag createTag(TagData tagData, Tag tag, String url) ! throws ParserException { ! String tagContents = tag.getText(); ! tagContents=tagContents.substring(9,tagContents.length()); ! tagData.setTagContents (tagContents, tag.getAttributesEx (), "" /*url*/, false /*xml_end_tag*/); ! return new DoctypeTag(tagData); ! } } --- 58,72 ---- } ! protected Tag createTag (Page page, int start, int end, Vector attributes, Tag tag, String url) throws ParserException ! { ! DoctypeTag ret; ! ! ret = new DoctypeTag (); ! ret.setPage (page); ! ret.setStartPosition (start); ! ret.setEndPosition (end); ! ret.setAttributesEx (attributes); + return (ret); + } } Index: FormScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/FormScanner.java,v retrieving revision 1.45 retrieving revision 1.46 diff -C2 -d -r1.45 -r1.46 *** FormScanner.java 22 Sep 2003 02:40:00 -0000 1.45 --- FormScanner.java 20 Oct 2003 01:28:03 -0000 1.46 *************** *** 29,50 **** package org.htmlparser.scanners; - ////////////////// - // Java Imports // - ////////////////// import java.util.Stack; import org.htmlparser.Parser; import org.htmlparser.tags.FormTag; import org.htmlparser.tags.Tag; - import org.htmlparser.tags.data.CompositeTagData; - import org.htmlparser.tags.data.TagData; import org.htmlparser.util.LinkProcessor; import org.htmlparser.util.ParserException; /** ! * Scans for the Image Tag. This is a subclass of TagScanner, and is called using a ! * variant of the template method. If the evaluate() method returns true, that means the ! * given string contains an image tag. Extraction is done by the scan method thereafter ! * by the user of this class. */ public class FormScanner extends CompositeTagScanner --- 29,45 ---- package org.htmlparser.scanners; import java.util.Stack; + import java.util.Vector; import org.htmlparser.Parser; + import org.htmlparser.lexer.Page; import org.htmlparser.tags.FormTag; import org.htmlparser.tags.Tag; import org.htmlparser.util.LinkProcessor; + import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; /** ! * Scanner for form tags. */ public class FormScanner extends CompositeTagScanner *************** *** 53,67 **** public static final String PREVIOUS_DIRTY_LINK_MESSAGE="Encountered a form tag after an open link tag.\nThere should have been an end tag for the link before the form tag began.\nCorrecting this.."; private boolean linkScannerAlreadyOpen=false; ! private static final String [] formTagEnders = {"HTML","BODY" ! }; private Stack stack = new Stack(); /** ! * HTMLFormScanner constructor comment. */ ! public FormScanner(Parser parser) { this("", parser); } /** ! * Overriding the constructor to accept the filter */ public FormScanner(String filter, Parser parser) --- 48,67 ---- public static final String PREVIOUS_DIRTY_LINK_MESSAGE="Encountered a form tag after an open link tag.\nThere should have been an end tag for the link before the form tag began.\nCorrecting this.."; private boolean linkScannerAlreadyOpen=false; ! private static final String [] formTagEnders = {"HTML","BODY"}; ! private Stack stack = new Stack(); + /** ! * Constructs a form scanner. ! * Adds input, textarea, select and option scanners to the parser's ! * scanner list. */ ! public FormScanner(Parser parser) ! { this("", parser); } + /** ! * Overriding the constructor to accept the filter. */ public FormScanner(String filter, Parser parser) *************** *** 74,83 **** } ! /** ! * Extract the location of the image, given the string to be parsed, and the url ! * of the html page in which this tag exists. ! * @param tag The form tag with the 'ACTION' attribute. ! * @param url URL of web page being parsed. ! */ public String extractFormLocn(Tag tag,String url) throws ParserException { --- 74,83 ---- } ! /** ! * Extract the location of the image, given the tag, and the url ! * of the html page in which this tag exists. ! * @param tag The form tag with the 'ACTION' attribute. ! * @param url URL of web page being parsed. ! */ public String extractFormLocn(Tag tag,String url) throws ParserException { *************** *** 108,147 **** /** - * Scan the tag and extract the information related to the <IMG> tag. The url of the - * initiating scan has to be provided in case relative links are found. The initial - * url is then prepended to it to give an absolute link. - * The NodeReader is provided in order to do a lookahead operation. We assume that - * the identification has already been performed using the evaluate() method. - * @param tag HTML Tag to be scanned for identification - * @param url The initiating url of the scan (Where the html page lies) - * @param reader The reader object responsible for reading the html page - * @param currentLine The current line (automatically provided by Tag) - */ - // public Tag scan(Tag tag,String url,NodeReader reader,String currentLine) throws ParserException - // { - // if (linkScannerAlreadyOpen) { - // String newLine = insertEndTagBeforeNode(tag, currentLine); - // reader.changeLine(newLine); - // return new EndTag( - // new TagData( - // tag.elementBegin(), - // tag.elementBegin()+3, - // "A", - // currentLine - // ) - // ); - // } - // return super.scan(tag,url,reader,currentLine); - // } - - - /** * @see org.htmlparser.scanners.TagScanner#getID() */ ! public String [] getID() { return MATCH_ID; } ! public boolean evaluate(String s, TagScanner previousOpenScanner) { if (previousOpenScanner instanceof LinkScanner) { linkScannerAlreadyOpen = true; --- 108,120 ---- /** * @see org.htmlparser.scanners.TagScanner#getID() */ ! public String [] getID() ! { return MATCH_ID; } ! public boolean evaluate(String s, TagScanner previousOpenScanner) ! { if (previousOpenScanner instanceof LinkScanner) { linkScannerAlreadyOpen = true; *************** *** 163,179 **** } ! public Tag createTag(TagData tagData, CompositeTagData compositeTagData) ! throws ParserException { ! String formUrl = extractFormLocn(compositeTagData.getStartTag(),tagData.getUrlBeingParsed()); ! if (formUrl!=null && formUrl.length()>0) ! compositeTagData.getStartTag().setAttribute("ACTION",formUrl); if (!stack.empty () && (this == stack.peek ())) stack.pop (); ! return new FormTag(tagData, compositeTagData); } ! public void beforeScanningStarts() { stack.push(this); } - } --- 136,170 ---- } ! public Tag createTag(Page page, int start, int end, Vector attributes, Tag startTag, Tag endTag, NodeList children) throws ParserException ! { ! FormTag ret; ! ! // special step here... ! // not sure why the recursion is tracked this way, ! // rather than using the ENDERS and END_TAG_ENDERS arrays... if (!stack.empty () && (this == stack.peek ())) stack.pop (); ! ! ret = new FormTag (); ! ret.setPage (page); ! ret.setStartPosition (start); ! ret.setEndPosition (end); ! ret.setAttributesEx (attributes); ! ret.setStartTag (startTag); ! ret.setEndTag (endTag); ! ret.setChildren (children); ! ! // special step here... ! // ... is it true that without an ACTION the default is to send it back to the same page? ! String formUrl = extractFormLocn(startTag, page.getUrl ()); ! if (formUrl!=null && formUrl.length()>0) ! startTag.setAttribute("ACTION",formUrl); ! ! return (ret); } ! public void beforeScanningStarts() ! { stack.push(this); } } Index: FrameScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/FrameScanner.java,v retrieving revision 1.29 retrieving revision 1.30 diff -C2 -d -r1.29 -r1.30 *** FrameScanner.java 22 Sep 2003 02:40:00 -0000 1.29 --- FrameScanner.java 20 Oct 2003 01:28:03 -0000 1.30 *************** *** 30,49 **** package org.htmlparser.scanners; - - ////////////////// - // Java Imports // - ////////////////// import java.util.Hashtable; import org.htmlparser.tags.FrameTag; import org.htmlparser.tags.Tag; - import org.htmlparser.tags.data.TagData; import org.htmlparser.util.LinkProcessor; import org.htmlparser.util.ParserException; /** ! * Scans for the Frame Tag. This is a subclass of TagScanner, and is called using a ! * variant of the template method. If the evaluate() method returns true, that means the ! * given string contains an image tag. Extraction is done by the scan method thereafter ! * by the user of this class. */ public class FrameScanner extends TagScanner --- 30,44 ---- package org.htmlparser.scanners; import java.util.Hashtable; + import java.util.Vector; + import org.htmlparser.lexer.Page; import org.htmlparser.tags.FrameTag; import org.htmlparser.tags.Tag; import org.htmlparser.util.LinkProcessor; import org.htmlparser.util.ParserException; + /** ! * Scanner for frame tags. */ public class FrameScanner extends TagScanner *************** *** 56,59 **** --- 51,55 ---- super(); } + /** * Overriding the constructor to accept the filter *************** *** 63,97 **** super(filter); } - /** - * Extract the location of the image, given the string to be parsed, and the url - * of the html page in which this tag exists. - * @param tag The tag with the 'SRC' attribute. - * @param url URL of web page being parsed. - */ - public String extractFrameLocn(Tag tag,String url) throws ParserException - { - try { - Hashtable table = tag.getAttributes(); - String relativeFrame = (String)table.get("SRC"); - if (relativeFrame==null) return ""; else - return (new LinkProcessor()).extract(relativeFrame,url); - } - catch (Exception e) { - String msg; - if (tag!=null) msg = tag.getText(); else msg = "null"; - throw new ParserException("HTMLFrameScanner.extractFrameLocn() : Error in extracting frame location from tag "+msg,e); - } - } - - - - public String extractFrameName(Tag tag,String url) { - return tag.getAttribute("NAME"); - } /** * @see org.htmlparser.scanners.TagScanner#getID() */ ! public String [] getID() { String [] ids = new String[1]; ids[0] = "FRAME"; --- 59,68 ---- super(filter); } /** * @see org.htmlparser.scanners.TagScanner#getID() */ ! public String [] getID() ! { String [] ids = new String[1]; ids[0] = "FRAME"; *************** *** 99,108 **** } ! protected Tag createTag(TagData tagData, Tag tag, String url) throws ParserException { ! String frameUrl = extractFrameLocn(tag,url); ! String frameName = extractFrameName(tag,url); ! ! return new FrameTag(tagData,frameUrl,frameName); } - } --- 70,84 ---- } ! protected Tag createTag (Page page, int start, int end, Vector attributes, Tag tag, String url) throws ParserException ! { ! FrameTag ret; ! ! ret = new FrameTag (); ! ret.setPage (page); ! ret.setStartPosition (start); ! ret.setEndPosition (end); ! ret.setAttributesEx (attributes); ! ! return (ret); } } Index: FrameSetScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/FrameSetScanner.java,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** FrameSetScanner.java 22 Sep 2003 02:40:00 -0000 1.28 --- FrameSetScanner.java 20 Oct 2003 01:28:03 -0000 1.29 *************** *** 27,47 **** // Website : http://www.industriallogic.com - package org.htmlparser.scanners; - ////////////////// - // Java Imports // - ////////////////// import org.htmlparser.tags.FrameSetTag; import org.htmlparser.tags.Tag; ! import org.htmlparser.tags.data.CompositeTagData; ! import org.htmlparser.tags.data.TagData; /** ! * Scans for the Frame Tag. This is a subclass of TagScanner, and is called using a ! * variant of the template method. If the evaluate() method returns true, that means the ! * given string contains an image tag. Extraction is done by the scan method thereafter ! * by the user of this class. */ public class FrameSetScanner extends CompositeTagScanner --- 27,42 ---- // Website : http://www.industriallogic.com package org.htmlparser.scanners; + import java.util.Vector; + import org.htmlparser.lexer.Page; import org.htmlparser.tags.FrameSetTag; import org.htmlparser.tags.Tag; ! import org.htmlparser.util.NodeList; ! import org.htmlparser.util.ParserException; /** ! * Scanner for frame set tags. */ public class FrameSetScanner extends CompositeTagScanner *************** *** 59,71 **** } ! public String [] getID() { return MATCH_NAME; } ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new FrameSetTag(tagData,compositeTagData); ! } } --- 54,76 ---- } ! public String [] getID() ! { return MATCH_NAME; } ! public Tag createTag(Page page, int start, int end, Vector attributes, Tag startTag, Tag endTag, NodeList children) throws ParserException ! { ! FrameSetTag ret; ! ! ret = new FrameSetTag (); ! ret.setPage (page); ! ret.setStartPosition (start); ! ret.setEndPosition (end); ! ret.setAttributesEx (attributes); ! ret.setStartTag (startTag); ! ret.setEndTag (endTag); ! ret.setChildren (children); + return (ret); + } } Index: HeadScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/HeadScanner.java,v retrieving revision 1.15 retrieving revision 1.16 diff -C2 -d -r1.15 -r1.16 *** HeadScanner.java 22 Sep 2003 02:40:00 -0000 1.15 --- HeadScanner.java 20 Oct 2003 01:28:03 -0000 1.16 *************** *** 32,61 **** package org.htmlparser.scanners; import org.htmlparser.tags.HeadTag; import org.htmlparser.tags.Tag; ! import org.htmlparser.tags.data.CompositeTagData; ! import org.htmlparser.tags.data.TagData; ! public class HeadScanner extends CompositeTagScanner { private static final String MATCH_NAME [] = {"HEAD"}; private static final String ENDERS [] = {"BODY"}; private static final String END_TAG_ENDERS [] = {"HTML"}; ! public HeadScanner() { this(""); } ! public HeadScanner(String filter) { super(filter,MATCH_NAME,ENDERS,END_TAG_ENDERS,false); } ! public String [] getID() { return MATCH_NAME; } ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new HeadTag(tagData,compositeTagData); } } --- 32,80 ---- package org.htmlparser.scanners; + import java.util.Vector; + import org.htmlparser.lexer.Page; import org.htmlparser.tags.HeadTag; import org.htmlparser.tags.Tag; ! import org.htmlparser.util.NodeList; ! import org.htmlparser.util.ParserException; ! /** ! * Scanner for head tags. ! */ ! public class HeadScanner extends CompositeTagScanner ! { private static final String MATCH_NAME [] = {"HEAD"}; private static final String ENDERS [] = {"BODY"}; private static final String END_TAG_ENDERS [] = {"HTML"}; ! public HeadScanner() ! { this(""); } ! public HeadScanner(String filter) ! { super(filter,MATCH_NAME,ENDERS,END_TAG_ENDERS,false); } ! public String [] getID() ! { return MATCH_NAME; } ! public Tag createTag(Page page, int start, int end, Vector attributes, Tag startTag, Tag endTag, NodeList children) throws ParserException ! { ! HeadTag ret; ! ! ret = new HeadTag (); ! ret.setPage (page); ! ret.setStartPosition (start); ! ret.setEndPosition (end); ! ret.setAttributesEx (attributes); ! ret.setStartTag (startTag); ! ret.setEndTag (endTag); ! ret.setChildren (children); ! ! return (ret); } } Index: HtmlScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/HtmlScanner.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** HtmlScanner.java 22 Sep 2003 02:40:00 -0000 1.30 --- HtmlScanner.java 20 Oct 2003 01:28:03 -0000 1.31 *************** *** 29,37 **** package org.htmlparser.scanners; import org.htmlparser.tags.Html; import org.htmlparser.tags.Tag; ! import org.htmlparser.tags.data.CompositeTagData; ! import org.htmlparser.tags.data.TagData; public class HtmlScanner extends CompositeTagScanner { private static String MATCH_STRING [] = {"HTML"}; --- 29,42 ---- package org.htmlparser.scanners; + import java.util.Vector; + import org.htmlparser.lexer.Page; import org.htmlparser.tags.Html; import org.htmlparser.tags.Tag; ! import org.htmlparser.util.NodeList; ! import org.htmlparser.util.ParserException; + /** + * Scanner for html tags. + */ public class HtmlScanner extends CompositeTagScanner { private static String MATCH_STRING [] = {"HTML"}; *************** *** 49,57 **** } ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new Html(tagData,compositeTagData); ! } } --- 54,71 ---- } ! public Tag createTag(Page page, int start, int end, Vector attributes, Tag startTag, Tag endTag, NodeList children) throws ParserException ! { ! Html ret; ! ! ret = new Html (); ! ret.setPage (page); ! ret.setStartPosition (start); ! ret.setEndPosition (end); ! ret.setAttributesEx (attributes); ! ret.setStartTag (startTag); ! ret.setEndTag (endTag); ! ret.setChildren (children); + return (ret); + } } Index: ImageScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/ImageScanner.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** ImageScanner.java 5 Oct 2003 13:49:52 -0000 1.30 --- ImageScanner.java 20 Oct 2003 01:28:03 -0000 1.31 *************** *** 34,42 **** import java.util.Hashtable; import java.util.Vector; import org.htmlparser.lexer.nodes.Attribute; import org.htmlparser.tags.ImageTag; import org.htmlparser.tags.Tag; - import org.htmlparser.tags.data.TagData; import org.htmlparser.util.LinkProcessor; import org.htmlparser.util.ParserException; --- 34,42 ---- import java.util.Hashtable; import java.util.Vector; + import org.htmlparser.lexer.Page; import org.htmlparser.lexer.nodes.Attribute; import org.htmlparser.tags.ImageTag; import org.htmlparser.tags.Tag; import org.htmlparser.util.LinkProcessor; import org.htmlparser.util.ParserException; *************** *** 69,176 **** } - /** - * Extract the location of the image - * Given the tag (with attributes), and the url of the html page in which - * this tag exists, perform best effort to extract the 'intended' URL. - * Attempts to handle such attributes as: - * <pre> - * <IMG SRC=http://www.redgreen.com> - normal - * <IMG SRC =http://www.redgreen.com> - space between attribute name and equals sign - * <IMG SRC= http://www.redgreen.com> - space between equals sign and attribute value - * <IMG SRC = http://www.redgreen.com> - space both sides of equals sign - * </pre> - * @param tag The tag with the 'SRC' attribute. - * @param url URL of web page being parsed. - */ - public String extractImageLocn (Tag tag, String url) throws ParserException - { - Vector attributes; - int size; - Attribute attribute; - String string; - String data; - int state; - String name; - String ret; - - ret = ""; - state = 0; - attributes = tag.getAttributesEx (); - size = attributes.size (); - for (int i = 0; (i < size) && (state < 3); i++) - { - attribute = (Attribute)attributes.elementAt (i); - string = attribute.getName (); - data = attribute.getValue (); - switch (state) - { - case 0: // looking for 'src' - if (null != string) - { - name = string.toUpperCase (); - if (name.equals ("SRC")) - { - state = 1; - if (null != data) - { - if ("".equals (data)) - state = 2; // empty attribute, SRC= - else - { - ret = data; - i = size; // exit fast - } - } - - } - else if (name.startsWith ("SRC")) - { - // missing equals sign - ret = string.substring (3); - state = 0; // go back to searching for SRC - // because, maybe we found SRCXXX - // where XXX isn't a URL - } - } - break; - case 1: // looking for equals sign - if (null != string) - { - if (string.startsWith ("=")) - { - state = 2; - if (1 < string.length ()) - { - ret = string.substring (1); - state = 0; // keep looking ? - } - else if (null != data) - { - ret = string.substring (1); - state = 0; // keep looking ? - } - } - } - break; - case 2: // looking for a valueless attribute that could be a relative or absolute URL - if (null != string) - { - if (null == data) - ret = string; - state = 0; // only check first non-whitespace item - // not every valid attribute after an equals - } - break; - default: - throw new IllegalStateException ("we're not supposed to in state " + state); - } - } - ret = ParserUtils.removeChars (ret, '\n'); - ret = ParserUtils.removeChars (ret, '\r'); - ret = processor.extract (ret, url); - - return (ret); - } - public String [] getID() { String [] ids = new String[1]; --- 69,72 ---- *************** *** 179,187 **** } ! protected Tag createTag(TagData tagData, Tag tag, String url) ! throws ParserException { ! String link = extractImageLocn(tag,url); ! return new ImageTag(tagData, link); ! } ! } --- 75,102 ---- } ! protected Tag createTag (Page page, int start, int end, Vector attributes, Tag tag, String url) throws ParserException ! { ! ImageTag ret; ! ret = new ImageTag (); ! ret.setPage (page); ! ret.setStartPosition (start); ! ret.setEndPosition (end); ! ret.setAttributesEx (attributes); ! ! // special step here... ! // Need to update the imageURL string in the image tag, ! // but not the SRC attribute which it does when you set the ImageURL ! // property. Can't do it in the tag, because the tag doesn't have the ! // current link processor object which might have a BASE href different ! // than the page. ! String src = ret.getAttribute ("SRC"); ! ret.setImageURL (processor.extract (ret.getImageURL (), page.getUrl ())); ! if (null == src) ! ret.removeAttribute ("SRC"); ! else ! ret.setAttribute ("SRC", src); ! ! return (ret); ! } ! } \ No newline at end of file Index: InputTagScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/InputTagScanner.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** InputTagScanner.java 22 Sep 2003 02:40:00 -0000 1.26 --- InputTagScanner.java 20 Oct 2003 01:28:03 -0000 1.27 *************** *** 29,35 **** package org.htmlparser.scanners; import org.htmlparser.tags.InputTag; import org.htmlparser.tags.Tag; - import org.htmlparser.tags.data.TagData; import org.htmlparser.util.ParserException; --- 29,36 ---- package org.htmlparser.scanners; + import java.util.Vector; + import org.htmlparser.lexer.Page; import org.htmlparser.tags.InputTag; import org.htmlparser.tags.Tag; import org.htmlparser.util.ParserException; *************** *** 52,59 **** } ! protected Tag createTag(TagData tagData, Tag tag, String url) ! throws ParserException { ! return new InputTag(tagData); ! } } --- 53,67 ---- } ! protected Tag createTag (Page page, int start, int end, Vector attributes, Tag tag, String url) throws ParserException ! { ! InputTag ret; + ret = new InputTag (); + ret.setPage (page); + ret.setStartPosition (start); + ret.setEndPosition (end); + ret.setAttributesEx (attributes); + + return (ret); + } } Index: JspScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/JspScanner.java,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** JspScanner.java 28 Sep 2003 15:33:58 -0000 1.28 --- JspScanner.java 20 Oct 2003 01:28:03 -0000 1.29 *************** *** 30,33 **** --- 30,35 ---- package org.htmlparser.scanners; + import java.util.Vector; + import org.htmlparser.lexer.Page; ///////////////////////// // HTML Parser Imports // *************** *** 35,39 **** import org.htmlparser.tags.JspTag; import org.htmlparser.tags.Tag; - import org.htmlparser.tags.data.TagData; import org.htmlparser.util.ParserException; --- 37,40 ---- *************** *** 56,66 **** } ! protected Tag createTag(TagData tagData, Tag tag, String url) ! throws ParserException { ! tagData.setTagBegin (tagData.getTagBegin () + 1); ! tagData.setTagEnd (tagData.getTagEnd () - 1); ! return new JspTag(tagData); } - } --- 57,71 ---- } ! protected Tag createTag (Page page, int start, int end, Vector attributes, Tag tag, String url) throws ParserException { ! JspTag ret; ! ! ret = new JspTag (); ! ret.setPage (page); ! ret.setStartPosition (start); ! ret.setEndPosition (end); ! ret.setAttributesEx (attributes); ! ! return (ret); } } Index: LabelScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/LabelScanner.java,v retrieving revision 1.33 retrieving revision 1.34 diff -C2 -d -r1.33 -r1.34 *** LabelScanner.java 22 Sep 2003 02:40:00 -0000 1.33 --- LabelScanner.java 20 Oct 2003 01:28:03 -0000 1.34 *************** *** 32,39 **** package org.htmlparser.scanners; import org.htmlparser.tags.LabelTag; import org.htmlparser.tags.Tag; ! import org.htmlparser.tags.data.CompositeTagData; ! import org.htmlparser.tags.data.TagData; public class LabelScanner extends CompositeTagScanner { --- 32,41 ---- package org.htmlparser.scanners; + import java.util.Vector; + import org.htmlparser.lexer.Page; import org.htmlparser.tags.LabelTag; import org.htmlparser.tags.Tag; ! import org.htmlparser.util.NodeList; ! import org.htmlparser.util.ParserException; public class LabelScanner extends CompositeTagScanner { *************** *** 52,59 **** } ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new LabelTag(tagData,compositeTagData); } } --- 54,71 ---- } ! public Tag createTag(Page page, int start, int end, Vector attributes, Tag startTag, Tag endTag, NodeList children) throws ParserException ! { ! LabelTag ret; ! ! ret = new LabelTag (); ! ret.setPage (page); ! ret.setStartPosition (start); ! ret.setEndPosition (end); ! ret.setAttributesEx (attributes); ! ret.setStartTag (startTag); ! ret.setEndTag (endTag); ! ret.setChildren (children); ! ! return (ret); } } Index: LinkScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/LinkScanner.java,v retrieving revision 1.54 retrieving revision 1.55 diff -C2 -d -r1.54 -r1.55 *** LinkScanner.java 5 Oct 2003 13:49:53 -0000 1.54 --- LinkScanner.java 20 Oct 2003 01:28:03 -0000 1.55 *************** *** 34,44 **** ////////////////// import java.util.Hashtable; import org.htmlparser.tags.LinkTag; import org.htmlparser.tags.Tag; - import org.htmlparser.tags.data.CompositeTagData; - import org.htmlparser.tags.data.LinkData; - import org.htmlparser.tags.data.TagData; import org.htmlparser.util.LinkProcessor; import org.htmlparser.util.ParserException; import org.htmlparser.util.ParserUtils; --- 34,44 ---- ////////////////// import java.util.Hashtable; + import java.util.Vector; + import org.htmlparser.lexer.Page; import org.htmlparser.tags.LinkTag; import org.htmlparser.tags.Tag; import org.htmlparser.util.LinkProcessor; + import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; import org.htmlparser.util.ParserUtils; *************** *** 53,57 **** private static final String MATCH_NAME [] = {"A"}; public static final String LINK_SCANNER_ID = "A"; ! private LinkProcessor processor; private final static String ENDERS [] = { "TD","TR","FORM","LI","BODY", "HTML" }; private final static String ENDTAG_ENDERS [] = { "TD","TR","FORM","LI","BODY", "HTML" }; --- 53,57 ---- private static final String MATCH_NAME [] = {"A"}; public static final String LINK_SCANNER_ID = "A"; ! public LinkProcessor processor; private final static String ENDERS [] = { "TD","TR","FORM","LI","BODY", "HTML" }; private final static String ENDTAG_ENDERS [] = { "TD","TR","FORM","LI","BODY", "HTML" }; *************** *** 72,111 **** } ! public Tag creat... [truncated message content] |