[Htmlparser-cvs] htmlparser/src/org/htmlparser/scanners AppletScanner.java,1.28,1.29 BaseHrefScanner
Brought to you by:
derrickoswald
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners In directory sc8-pr-cvs1:/tmp/cvs-serv31228/scanners Modified Files: AppletScanner.java BaseHrefScanner.java BodyScanner.java BulletListScanner.java BulletScanner.java CompositeTagScanner.java DivScanner.java DoctypeScanner.java FormScanner.java FrameScanner.java FrameSetScanner.java HeadScanner.java HtmlScanner.java ImageScanner.java InputTagScanner.java JspScanner.java LabelScanner.java LinkScanner.java MetaTagScanner.java OptionTagScanner.java ScriptScanner.java SelectTagScanner.java SpanScanner.java StyleScanner.java TableColumnScanner.java TableRowScanner.java TableScanner.java TagScanner.java TextareaTagScanner.java TitleScanner.java Log Message: Change tabs to spaces in all source files. Index: AppletScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/AppletScanner.java,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** AppletScanner.java 24 Aug 2003 21:59:42 -0000 1.28 --- AppletScanner.java 3 Sep 2003 23:36:19 -0000 1.29 *************** *** 39,61 **** */ public class AppletScanner extends CompositeTagScanner { ! private static String [] MATCH_STRING = {"APPLET"}; ! ! public AppletScanner() { ! super(MATCH_STRING); ! } ! ! public AppletScanner(String filter) { ! super(filter,MATCH_STRING); ! } ! public String [] getID() { ! return MATCH_STRING; ! } ! public Tag createTag(TagData tagData, CompositeTagData compositeTagData) ! throws ParserException { ! ! return new AppletTag(tagData,compositeTagData); ! } } --- 39,61 ---- */ public class AppletScanner extends CompositeTagScanner { ! private static String [] MATCH_STRING = {"APPLET"}; ! ! public AppletScanner() { ! super(MATCH_STRING); ! } ! ! public AppletScanner(String filter) { ! super(filter,MATCH_STRING); ! } ! public String [] getID() { ! return MATCH_STRING; ! } ! public Tag createTag(TagData tagData, CompositeTagData compositeTagData) ! throws ParserException { ! ! return new AppletTag(tagData,compositeTagData); ! } } Index: BaseHrefScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/BaseHrefScanner.java,v retrieving revision 1.22 retrieving revision 1.23 diff -C2 -d -r1.22 -r1.23 *** BaseHrefScanner.java 24 Aug 2003 21:59:42 -0000 1.22 --- BaseHrefScanner.java 3 Sep 2003 23:36:19 -0000 1.23 *************** *** 36,65 **** public class BaseHrefScanner extends TagScanner { ! private LinkProcessor processor; ! public BaseHrefScanner() { ! super(); ! } ! public BaseHrefScanner(String filter,LinkProcessor processor) { ! super(filter); ! this.processor = processor; ! } ! public String [] getID() { ! String [] ids = new String[1]; ! ids[0] = "BASE"; ! return ids; ! } ! protected Tag createTag(TagData tagData, Tag tag, String url) ! throws ParserException { ! String baseUrl = (String)tag.getAttribute("HREF"); ! String absoluteBaseUrl=""; ! if (baseUrl != null && baseUrl.length()>0) { ! absoluteBaseUrl = LinkProcessor.removeLastSlash(baseUrl.trim()); ! processor.setBaseUrl(absoluteBaseUrl); ! } ! return new BaseHrefTag(tagData,absoluteBaseUrl); ! } } --- 36,65 ---- public class BaseHrefScanner extends TagScanner { ! private LinkProcessor processor; ! public BaseHrefScanner() { ! super(); ! } ! public BaseHrefScanner(String filter,LinkProcessor processor) { ! super(filter); ! this.processor = processor; ! } ! public String [] getID() { ! String [] ids = new String[1]; ! ids[0] = "BASE"; ! return ids; ! } ! protected Tag createTag(TagData tagData, Tag tag, String url) ! throws ParserException { ! String baseUrl = (String)tag.getAttribute("HREF"); ! String absoluteBaseUrl=""; ! if (baseUrl != null && baseUrl.length()>0) { ! absoluteBaseUrl = LinkProcessor.removeLastSlash(baseUrl.trim()); ! processor.setBaseUrl(absoluteBaseUrl); ! } ! return new BaseHrefTag(tagData,absoluteBaseUrl); ! } } Index: BodyScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/BodyScanner.java,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** BodyScanner.java 24 Aug 2003 21:59:42 -0000 1.14 --- BodyScanner.java 3 Sep 2003 23:36:19 -0000 1.15 *************** *** 39,63 **** */ public class BodyScanner extends CompositeTagScanner { ! private static final String MATCH_NAME [] = {"BODY"}; ! private static final String ENDERS [] = {}; ! private static final String END_TAG_ENDERS [] = {"HTML"}; ! public BodyScanner() { ! this(""); ! } ! ! public BodyScanner(String filter) { ! super(filter,MATCH_NAME,ENDERS,END_TAG_ENDERS,false); ! } ! public String [] getID() { ! return MATCH_NAME; ! } ! ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new BodyTag(tagData,compositeTagData); ! } } --- 39,63 ---- */ public class BodyScanner extends CompositeTagScanner { ! private static final String MATCH_NAME [] = {"BODY"}; ! private static final String ENDERS [] = {}; ! private static final String END_TAG_ENDERS [] = {"HTML"}; ! public BodyScanner() { ! this(""); ! } ! ! public BodyScanner(String filter) { ! super(filter,MATCH_NAME,ENDERS,END_TAG_ENDERS,false); ! } ! public String [] getID() { ! return MATCH_NAME; ! } ! ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new BodyTag(tagData,compositeTagData); ! } } Index: BulletListScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/BulletListScanner.java,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** BulletListScanner.java 24 Aug 2003 21:59:42 -0000 1.13 --- BulletListScanner.java 3 Sep 2003 23:36:19 -0000 1.14 *************** *** 40,68 **** public class BulletListScanner extends CompositeTagScanner { ! private static final String [] MATCH_STRING = { "UL", "OL" }; ! private final static String ENDERS [] = { "BODY", "HTML" }; ! private Stack ulli = new Stack(); ! ! public BulletListScanner(Parser parser) { ! this("",parser); ! } ! public BulletListScanner(String filter, Parser parser) { ! super(filter, MATCH_STRING, ENDERS); ! parser.addScanner(new BulletScanner("-bullet",ulli)); ! } ! public Tag createTag(TagData tagData, CompositeTagData compositeTagData) ! throws ParserException { ! return new BulletList(tagData,compositeTagData); ! } ! public String[] getID() { ! return MATCH_STRING; ! } ! ! public void beforeScanningStarts() { ! ulli.push(this); ! } } --- 40,68 ---- public class BulletListScanner extends CompositeTagScanner { ! private static final String [] MATCH_STRING = { "UL", "OL" }; ! private final static String ENDERS [] = { "BODY", "HTML" }; ! private Stack ulli = new Stack(); ! ! public BulletListScanner(Parser parser) { ! this("",parser); ! } ! public BulletListScanner(String filter, Parser parser) { ! super(filter, MATCH_STRING, ENDERS); ! parser.addScanner(new BulletScanner("-bullet",ulli)); ! } ! public Tag createTag(TagData tagData, CompositeTagData compositeTagData) ! throws ParserException { ! return new BulletList(tagData,compositeTagData); ! } ! public String[] getID() { ! return MATCH_STRING; ! } ! ! public void beforeScanningStarts() { ! ulli.push(this); ! } } Index: BulletScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/BulletScanner.java,v retrieving revision 1.18 retrieving revision 1.19 diff -C2 -d -r1.18 -r1.19 *** BulletScanner.java 24 Aug 2003 21:59:42 -0000 1.18 --- BulletScanner.java 3 Sep 2003 23:36:19 -0000 1.19 *************** *** 47,89 **** */ public class BulletScanner extends CompositeTagScanner { ! private static final String [] MATCH_STRING = {"LI"}; ! private final static String ENDERS [] = { "BODY", "HTML" }; ! private final static String END_TAG_ENDERS [] = { "UL" }; ! private Stack ulli; ! ! public BulletScanner(Stack ulli) { ! this("",ulli); ! } ! public BulletScanner(String filter, Stack ulli) { ! super(filter, MATCH_STRING, ENDERS, END_TAG_ENDERS, false); ! this.ulli = ulli; ! } ! public Tag createTag(TagData tagData, CompositeTagData compositeTagData) ! throws ParserException { ! return new Bullet(tagData,compositeTagData); ! } ! public String[] getID() { ! return MATCH_STRING; ! } ! ! /** ! * This is the logic that decides when a bullet tag can be allowed ! */ ! public boolean shouldCreateEndTagAndExit() { ! if (ulli.size()==0) return false; ! CompositeTagScanner parentScanner = (CompositeTagScanner)ulli.peek(); ! if (parentScanner == this) { ! ulli.pop(); ! return true; ! } else ! return false; ! } ! public void beforeScanningStarts() { ! ulli.push(this); ! } } --- 47,89 ---- */ public class BulletScanner extends CompositeTagScanner { ! private static final String [] MATCH_STRING = {"LI"}; ! private final static String ENDERS [] = { "BODY", "HTML" }; ! private final static String END_TAG_ENDERS [] = { "UL" }; ! private Stack ulli; ! ! public BulletScanner(Stack ulli) { ! this("",ulli); ! } ! public BulletScanner(String filter, Stack ulli) { ! super(filter, MATCH_STRING, ENDERS, END_TAG_ENDERS, false); ! this.ulli = ulli; ! } ! public Tag createTag(TagData tagData, CompositeTagData compositeTagData) ! throws ParserException { ! return new Bullet(tagData,compositeTagData); ! } ! public String[] getID() { ! return MATCH_STRING; ! } ! ! /** ! * This is the logic that decides when a bullet tag can be allowed ! */ ! public boolean shouldCreateEndTagAndExit() { ! if (ulli.size()==0) return false; ! CompositeTagScanner parentScanner = (CompositeTagScanner)ulli.peek(); ! if (parentScanner == this) { ! ulli.pop(); ! return true; ! } else ! return false; ! } ! public void beforeScanningStarts() { ! ulli.push(this); ! } } Index: CompositeTagScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/CompositeTagScanner.java,v retrieving revision 1.63 retrieving revision 1.64 diff -C2 -d -r1.63 -r1.64 *** CompositeTagScanner.java 24 Aug 2003 21:59:42 -0000 1.63 --- CompositeTagScanner.java 3 Sep 2003 23:36:19 -0000 1.64 *************** *** 55,62 **** * MyScanner extends CompositeTagScanner { * private static final String [] MATCH_IDS = { "MYTAG" }; ! * MyScanner() { ! * super(MATCH_IDS); ! * } ! * ... * } * </pre> --- 55,62 ---- * MyScanner extends CompositeTagScanner { * private static final String [] MATCH_IDS = { "MYTAG" }; ! * MyScanner() { ! * super(MATCH_IDS); ! * } ! * ... * } * </pre> *************** *** 69,76 **** * private static final String [] ENDERS = {}; * private static final String [] END_TAG_ENDERS = { "BODY", "HTML" }; ! * MyScanner() { ! * super(MATCH_IDS, ENDERS, END_TAG_ENDERS, true); ! * } ! * ... * } * </pre> --- 69,76 ---- * private static final String [] ENDERS = {}; * private static final String [] END_TAG_ENDERS = { "BODY", "HTML" }; ! * MyScanner() { ! * super(MATCH_IDS, ENDERS, END_TAG_ENDERS, true); ! * } ! * ... * } * </pre> *************** *** 84,91 **** * private static final String [] ENDERS = {}; * private static final String [] END_TAG_ENDERS = { "BODY", "HTML" }; ! * MyScanner() { ! * super(MATCH_IDS, ENDERS,END_TAG_ENDERS, false); ! * } ! * ... * } * </pre> --- 84,91 ---- * private static final String [] ENDERS = {}; * private static final String [] END_TAG_ENDERS = { "BODY", "HTML" }; ! * MyScanner() { ! * super(MATCH_IDS, ENDERS,END_TAG_ENDERS, false); ! * } ! * ... * } * </pre> *************** *** 93,136 **** */ public abstract class CompositeTagScanner extends TagScanner { ! protected String [] nameOfTagToMatch; ! private boolean allowSelfChildren; ! protected Set tagEnderSet; ! private Set endTagEnderSet; ! private boolean balance_quotes; ! ! public CompositeTagScanner(String [] nameOfTagToMatch) { ! this(nameOfTagToMatch,new String[] {}); ! } ! public CompositeTagScanner(String [] nameOfTagToMatch, String [] tagEnders) { ! this("",nameOfTagToMatch,tagEnders); ! } ! public CompositeTagScanner(String [] nameOfTagToMatch, String [] tagEnders, boolean allowSelfChildren) { ! this("",nameOfTagToMatch,tagEnders,allowSelfChildren); ! } ! public CompositeTagScanner(String filter, String [] nameOfTagToMatch) { ! this(filter,nameOfTagToMatch,new String [] {},true); ! } ! public CompositeTagScanner(String filter, String [] nameOfTagToMatch, String [] tagEnders) { ! this(filter,nameOfTagToMatch,tagEnders,true); ! } ! public CompositeTagScanner( ! String filter, ! String [] nameOfTagToMatch, ! String [] tagEnders, ! boolean allowSelfChildren) { ! this(filter,nameOfTagToMatch,tagEnders,new String[] {}, allowSelfChildren); ! } ! public CompositeTagScanner( ! String filter, ! String [] nameOfTagToMatch, ! String [] tagEnders, ! String [] endTagEnders, ! boolean allowSelfChildren) { this(filter,nameOfTagToMatch,tagEnders,endTagEnders, allowSelfChildren, false); --- 93,136 ---- */ public abstract class CompositeTagScanner extends TagScanner { ! protected String [] nameOfTagToMatch; ! private boolean allowSelfChildren; ! protected Set tagEnderSet; ! private Set endTagEnderSet; ! private boolean balance_quotes; ! ! public CompositeTagScanner(String [] nameOfTagToMatch) { ! this(nameOfTagToMatch,new String[] {}); ! } ! public CompositeTagScanner(String [] nameOfTagToMatch, String [] tagEnders) { ! this("",nameOfTagToMatch,tagEnders); ! } ! public CompositeTagScanner(String [] nameOfTagToMatch, String [] tagEnders, boolean allowSelfChildren) { ! this("",nameOfTagToMatch,tagEnders,allowSelfChildren); ! } ! public CompositeTagScanner(String filter, String [] nameOfTagToMatch) { ! this(filter,nameOfTagToMatch,new String [] {},true); ! } ! public CompositeTagScanner(String filter, String [] nameOfTagToMatch, String [] tagEnders) { ! this(filter,nameOfTagToMatch,tagEnders,true); ! } ! public CompositeTagScanner( ! String filter, ! String [] nameOfTagToMatch, ! String [] tagEnders, ! boolean allowSelfChildren) { ! this(filter,nameOfTagToMatch,tagEnders,new String[] {}, allowSelfChildren); ! } ! public CompositeTagScanner( ! String filter, ! String [] nameOfTagToMatch, ! String [] tagEnders, ! String [] endTagEnders, ! boolean allowSelfChildren) { this(filter,nameOfTagToMatch,tagEnders,endTagEnders, allowSelfChildren, false); *************** *** 158,229 **** * within quotes. */ ! public CompositeTagScanner( ! String filter, ! String [] nameOfTagToMatch, ! String [] tagEnders, ! String [] endTagEnders, ! boolean allowSelfChildren, boolean balance_quotes) { ! super(filter); ! this.nameOfTagToMatch = nameOfTagToMatch; ! this.allowSelfChildren = allowSelfChildren; this.balance_quotes = balance_quotes; ! this.tagEnderSet = new HashSet(); ! for (int i=0;i<tagEnders.length;i++) ! tagEnderSet.add(tagEnders[i]); ! this.endTagEnderSet = new HashSet(); ! for (int i=0;i<endTagEnders.length;i++) ! endTagEnderSet.add(endTagEnders[i]); ! } ! public Tag scan(Tag tag, String url, NodeReader reader,String currLine) throws ParserException { ! CompositeTagScannerHelper helper = ! new CompositeTagScannerHelper(this,tag,url,reader,currLine,balance_quotes); ! return helper.scan(); ! } ! /** ! * Override this method if you wish to create any data structures or do anything ! * before the start of the scan. This is just after a tag has triggered the scanner ! * but before the scanner begins its processing. ! */ ! public void beforeScanningStarts() { ! } ! ! /** ! * This method is called everytime a child to the composite is found. It is useful when we ! * need to store special children seperately. Though, all children are collected anyway into a node list. ! */ ! public void childNodeEncountered(Node node) { ! } ! /** ! * You must override this method to create the tag of your choice upon successful parsing. Data required ! * for construction of your tag can be found within tagData and compositeTagData ! */ ! public abstract Tag createTag(TagData tagData, CompositeTagData compositeTagData) throws ParserException; ! public final boolean isTagToBeEndedFor(Tag tag) { ! boolean isEndTag = tag instanceof EndTag; ! String tagName = tag.getTagName(); ! if ( ! ( isEndTag && endTagEnderSet.contains(tagName)) || ! (!isEndTag && tagEnderSet.contains(tagName)) ! ) ! return true; else return false; ! } ! public final boolean isAllowSelfChildren() { ! return allowSelfChildren; ! } ! /** ! * Override this method to implement scanner logic that determines if the current scanner is ! * to be allowed. This is useful when there are rules which dont allow recursive tags of the same ! * type. @see BulletScanner ! * @return boolean true/false ! */ ! public boolean shouldCreateEndTagAndExit() { ! return false; ! } } --- 158,229 ---- * within quotes. */ ! public CompositeTagScanner( ! String filter, ! String [] nameOfTagToMatch, ! String [] tagEnders, ! String [] endTagEnders, ! boolean allowSelfChildren, boolean balance_quotes) { ! super(filter); ! this.nameOfTagToMatch = nameOfTagToMatch; ! this.allowSelfChildren = allowSelfChildren; this.balance_quotes = balance_quotes; ! this.tagEnderSet = new HashSet(); ! for (int i=0;i<tagEnders.length;i++) ! tagEnderSet.add(tagEnders[i]); ! this.endTagEnderSet = new HashSet(); ! for (int i=0;i<endTagEnders.length;i++) ! endTagEnderSet.add(endTagEnders[i]); ! } ! public Tag scan(Tag tag, String url, NodeReader reader,String currLine) throws ParserException { ! CompositeTagScannerHelper helper = ! new CompositeTagScannerHelper(this,tag,url,reader,currLine,balance_quotes); ! return helper.scan(); ! } ! /** ! * Override this method if you wish to create any data structures or do anything ! * before the start of the scan. This is just after a tag has triggered the scanner ! * but before the scanner begins its processing. ! */ ! public void beforeScanningStarts() { ! } ! ! /** ! * This method is called everytime a child to the composite is found. It is useful when we ! * need to store special children seperately. Though, all children are collected anyway into a node list. ! */ ! public void childNodeEncountered(Node node) { ! } ! /** ! * You must override this method to create the tag of your choice upon successful parsing. Data required ! * for construction of your tag can be found within tagData and compositeTagData ! */ ! public abstract Tag createTag(TagData tagData, CompositeTagData compositeTagData) throws ParserException; ! public final boolean isTagToBeEndedFor(Tag tag) { ! boolean isEndTag = tag instanceof EndTag; ! String tagName = tag.getTagName(); ! if ( ! ( isEndTag && endTagEnderSet.contains(tagName)) || ! (!isEndTag && tagEnderSet.contains(tagName)) ! ) ! return true; else return false; ! } ! public final boolean isAllowSelfChildren() { ! return allowSelfChildren; ! } ! /** ! * Override this method to implement scanner logic that determines if the current scanner is ! * to be allowed. This is useful when there are rules which dont allow recursive tags of the same ! * type. @see BulletScanner ! * @return boolean true/false ! */ ! public boolean shouldCreateEndTagAndExit() { ! return false; ! } } Index: DivScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/DivScanner.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** DivScanner.java 24 Aug 2003 21:59:42 -0000 1.26 --- DivScanner.java 3 Sep 2003 23:36:19 -0000 1.27 *************** *** 35,57 **** public class DivScanner extends CompositeTagScanner { ! private static String MATCH_STRING [] = {"DIV"}; ! ! public DivScanner() { ! this(""); ! } ! public DivScanner(String filter) { ! super(filter, MATCH_STRING); ! } ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new Div(tagData,compositeTagData); ! } ! public String[] getID() { ! return MATCH_STRING; ! } } --- 35,57 ---- public class DivScanner extends CompositeTagScanner { ! private static String MATCH_STRING [] = {"DIV"}; ! ! public DivScanner() { ! this(""); ! } ! public DivScanner(String filter) { ! super(filter, MATCH_STRING); ! } ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new Div(tagData,compositeTagData); ! } ! public String[] getID() { ! return MATCH_STRING; ! } } Index: DoctypeScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/DoctypeScanner.java,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** DoctypeScanner.java 24 Aug 2003 21:59:42 -0000 1.23 --- DoctypeScanner.java 3 Sep 2003 23:36:19 -0000 1.24 *************** *** 41,65 **** public class DoctypeScanner extends TagScanner { ! public DoctypeScanner() { ! super(); ! } ! public DoctypeScanner(String filter) { ! super(filter); ! } ! public String [] getID() { ! String [] ids = new String[1]; ! ids[0] = "!DOCTYPE"; ! return ids; ! } ! protected Tag createTag(TagData tagData, Tag tag, String url) ! throws ParserException { ! String tagContents = tag.getText(); ! tagContents=tagContents.substring(9,tagContents.length()); ! tagData.setTagContents(tagContents); ! return new DoctypeTag(tagData); ! } } --- 41,65 ---- public class DoctypeScanner extends TagScanner { ! public DoctypeScanner() { ! super(); ! } ! public DoctypeScanner(String filter) { ! super(filter); ! } ! public String [] getID() { ! String [] ids = new String[1]; ! ids[0] = "!DOCTYPE"; ! return ids; ! } ! protected Tag createTag(TagData tagData, Tag tag, String url) ! throws ParserException { ! String tagContents = tag.getText(); ! tagContents=tagContents.substring(9,tagContents.length()); ! tagData.setTagContents(tagContents); ! return new DoctypeTag(tagData); ! } } Index: FormScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/FormScanner.java,v retrieving revision 1.41 retrieving revision 1.42 diff -C2 -d -r1.41 -r1.42 *** FormScanner.java 24 Aug 2003 21:59:42 -0000 1.41 --- FormScanner.java 3 Sep 2003 23:36:19 -0000 1.42 *************** *** 50,77 **** public class FormScanner extends CompositeTagScanner { ! private static final String [] MATCH_ID = { "FORM" }; ! public static final String PREVIOUS_DIRTY_LINK_MESSAGE="Encountered a form tag after an open link tag.\nThere should have been an end tag for the link before the form tag began.\nCorrecting this.."; ! private boolean linkScannerAlreadyOpen=false; ! private static final String [] formTagEnders = {"HTML","BODY" ! }; ! private Stack stack = new Stack(); ! /** ! * HTMLFormScanner constructor comment. ! */ ! public FormScanner(Parser parser) { ! this("", parser); ! } ! /** ! * Overriding the constructor to accept the filter ! */ ! public FormScanner(String filter, Parser parser) ! { ! super(filter,MATCH_ID,formTagEnders,false); ! parser.addScanner(new InputTagScanner("-i")); ! parser.addScanner(new TextareaTagScanner("-t",stack)); ! parser.addScanner(new SelectTagScanner("-select", stack)); ! parser.addScanner(new OptionTagScanner("-option",stack)); ! } ! /** * Extract the location of the image, given the string to be parsed, and the url --- 50,77 ---- public class FormScanner extends CompositeTagScanner { ! private static final String [] MATCH_ID = { "FORM" }; ! public static final String PREVIOUS_DIRTY_LINK_MESSAGE="Encountered a form tag after an open link tag.\nThere should have been an end tag for the link before the form tag began.\nCorrecting this.."; ! private boolean linkScannerAlreadyOpen=false; ! private static final String [] formTagEnders = {"HTML","BODY" ! }; ! private Stack stack = new Stack(); ! /** ! * HTMLFormScanner constructor comment. ! */ ! public FormScanner(Parser parser) { ! this("", parser); ! } ! /** ! * Overriding the constructor to accept the filter ! */ ! public FormScanner(String filter, Parser parser) ! { ! super(filter,MATCH_ID,formTagEnders,false); ! parser.addScanner(new InputTagScanner("-i")); ! parser.addScanner(new TextareaTagScanner("-t",stack)); ! parser.addScanner(new SelectTagScanner("-select", stack)); ! parser.addScanner(new OptionTagScanner("-option",stack)); ! } ! /** * Extract the location of the image, given the string to be parsed, and the url *************** *** 80,179 **** * @param url URL of web page being parsed. */ ! public String extractFormLocn(Tag tag,String url) throws ParserException ! { ! try { ! String formURL= tag.getAttribute("ACTION"); ! if (formURL==null) return ""; else ! return (new LinkProcessor()).extract(formURL, url); ! } ! catch (Exception e) { ! String msg; ! if (tag!=null) msg= tag.getText(); else msg=""; ! throw new ParserException("HTMLFormScanner.extractFormLocn() : Error in extracting form location, tag = "+msg+", url = "+url,e); ! } ! } ! public String extractFormName(Tag tag) ! { ! return tag.getAttribute("NAME"); ! } ! public String extractFormMethod(Tag tag) ! { ! String method = tag.getAttribute("METHOD"); ! if (method==null) method = FormTag.GET; ! return method.toUpperCase(); ! } ! /** ! * Scan the tag and extract the information related to the <IMG> tag. The url of the ! * initiating scan has to be provided in case relative links are found. The initial ! * url is then prepended to it to give an absolute link. ! * The NodeReader is provided in order to do a lookahead operation. We assume that ! * the identification has already been performed using the evaluate() method. ! * @param tag HTML Tag to be scanned for identification ! * @param url The initiating url of the scan (Where the html page lies) ! * @param reader The reader object responsible for reading the html page ! * @param currentLine The current line (automatically provided by Tag) ! */ ! // public Tag scan(Tag tag,String url,NodeReader reader,String currentLine) throws ParserException ! // { ! // if (linkScannerAlreadyOpen) { ! // String newLine = insertEndTagBeforeNode(tag, currentLine); ! // reader.changeLine(newLine); ! // return new EndTag( ! // new TagData( ! // tag.elementBegin(), ! // tag.elementBegin()+3, ! // "A", ! // currentLine ! // ) ! // ); ! // } ! // return super.scan(tag,url,reader,currentLine); ! // } ! /** ! * @see org.htmlparser.scanners.TagScanner#getID() ! */ ! public String [] getID() { ! return MATCH_ID; ! } ! public boolean evaluate(String s, TagScanner previousOpenScanner) { ! if (previousOpenScanner instanceof LinkScanner) { ! linkScannerAlreadyOpen = true; ! StringBuffer msg= new StringBuffer(); ! msg.append("<"); ! msg.append(s); ! msg.append(">"); ! msg.append(PREVIOUS_DIRTY_LINK_MESSAGE); ! feedback.warning(msg.toString()); ! // This is dirty HTML. Assume the current tag is ! // not a new link tag - but an end tag. This is actually a really wild bug - ! // Internet Explorer actually parses such tags. ! // So - we shall then proceed to fool the scanner into sending an endtag of type </A> ! // For this - set the dirty flag to true and return ! } ! else ! linkScannerAlreadyOpen = false; ! return super.evaluate(s, previousOpenScanner); ! } ! public Tag createTag(TagData tagData, CompositeTagData compositeTagData) ! throws ParserException { ! String formUrl = extractFormLocn(compositeTagData.getStartTag(),tagData.getUrlBeingParsed()); ! if (formUrl!=null && formUrl.length()>0) ! compositeTagData.getStartTag().setAttribute("ACTION",formUrl); if (!stack.empty () && (this == stack.peek ())) stack.pop (); ! return new FormTag(tagData, compositeTagData); ! } ! public void beforeScanningStarts() { ! stack.push(this); ! } } --- 80,179 ---- * @param url URL of web page being parsed. */ ! public String extractFormLocn(Tag tag,String url) throws ParserException ! { ! try { ! String formURL= tag.getAttribute("ACTION"); ! if (formURL==null) return ""; else ! return (new LinkProcessor()).extract(formURL, url); ! } ! catch (Exception e) { ! String msg; ! if (tag!=null) msg= tag.getText(); else msg=""; ! throw new ParserException("HTMLFormScanner.extractFormLocn() : Error in extracting form location, tag = "+msg+", url = "+url,e); ! } ! } ! public String extractFormName(Tag tag) ! { ! return tag.getAttribute("NAME"); ! } ! public String extractFormMethod(Tag tag) ! { ! String method = tag.getAttribute("METHOD"); ! if (method==null) method = FormTag.GET; ! return method.toUpperCase(); ! } ! /** ! * Scan the tag and extract the information related to the <IMG> tag. The url of the ! * initiating scan has to be provided in case relative links are found. The initial ! * url is then prepended to it to give an absolute link. ! * The NodeReader is provided in order to do a lookahead operation. We assume that ! * the identification has already been performed using the evaluate() method. ! * @param tag HTML Tag to be scanned for identification ! * @param url The initiating url of the scan (Where the html page lies) ! * @param reader The reader object responsible for reading the html page ! * @param currentLine The current line (automatically provided by Tag) ! */ ! // public Tag scan(Tag tag,String url,NodeReader reader,String currentLine) throws ParserException ! // { ! // if (linkScannerAlreadyOpen) { ! // String newLine = insertEndTagBeforeNode(tag, currentLine); ! // reader.changeLine(newLine); ! // return new EndTag( ! // new TagData( ! // tag.elementBegin(), ! // tag.elementBegin()+3, ! // "A", ! // currentLine ! // ) ! // ); ! // } ! // return super.scan(tag,url,reader,currentLine); ! // } ! /** ! * @see org.htmlparser.scanners.TagScanner#getID() ! */ ! public String [] getID() { ! return MATCH_ID; ! } ! public boolean evaluate(String s, TagScanner previousOpenScanner) { ! if (previousOpenScanner instanceof LinkScanner) { ! linkScannerAlreadyOpen = true; ! StringBuffer msg= new StringBuffer(); ! msg.append("<"); ! msg.append(s); ! msg.append(">"); ! msg.append(PREVIOUS_DIRTY_LINK_MESSAGE); ! feedback.warning(msg.toString()); ! // This is dirty HTML. Assume the current tag is ! // not a new link tag - but an end tag. This is actually a really wild bug - ! // Internet Explorer actually parses such tags. ! // So - we shall then proceed to fool the scanner into sending an endtag of type </A> ! // For this - set the dirty flag to true and return ! } ! else ! linkScannerAlreadyOpen = false; ! return super.evaluate(s, previousOpenScanner); ! } ! public Tag createTag(TagData tagData, CompositeTagData compositeTagData) ! throws ParserException { ! String formUrl = extractFormLocn(compositeTagData.getStartTag(),tagData.getUrlBeingParsed()); ! if (formUrl!=null && formUrl.length()>0) ! compositeTagData.getStartTag().setAttribute("ACTION",formUrl); if (!stack.empty () && (this == stack.peek ())) stack.pop (); ! return new FormTag(tagData, compositeTagData); ! } ! public void beforeScanningStarts() { ! stack.push(this); ! } } Index: FrameScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/FrameScanner.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** FrameScanner.java 24 Aug 2003 21:59:42 -0000 1.25 --- FrameScanner.java 3 Sep 2003 23:36:19 -0000 1.26 *************** *** 49,66 **** public class FrameScanner extends TagScanner { ! /** ! * Overriding the default constructor ! */ ! public FrameScanner() ! { ! super(); ! } ! /** ! * Overriding the constructor to accept the filter ! */ ! public FrameScanner(String filter) ! { ! super(filter); ! } /** * Extract the location of the image, given the string to be parsed, and the url --- 49,66 ---- public class FrameScanner extends TagScanner { ! /** ! * Overriding the default constructor ! */ ! public FrameScanner() ! { ! super(); ! } ! /** ! * Overriding the constructor to accept the filter ! */ ! public FrameScanner(String filter) ! { ! super(filter); ! } /** * Extract the location of the image, given the string to be parsed, and the url *************** *** 69,108 **** * @param url URL of web page being parsed. */ ! public String extractFrameLocn(Tag tag,String url) throws ParserException ! { ! try { ! Hashtable table = tag.getAttributes(); ! String relativeFrame = (String)table.get("SRC"); ! if (relativeFrame==null) return ""; else ! return (new LinkProcessor()).extract(relativeFrame,url); ! } ! catch (Exception e) { ! String msg; ! if (tag!=null) msg = tag.getText(); else msg = "null"; ! throw new ParserException("HTMLFrameScanner.extractFrameLocn() : Error in extracting frame location from tag "+msg,e); ! } ! } ! ! public String extractFrameName(Tag tag,String url) { ! return tag.getAttribute("NAME"); ! } ! /** ! * @see org.htmlparser.scanners.TagScanner#getID() ! */ ! public String [] getID() { ! String [] ids = new String[1]; ! ids[0] = "FRAME"; ! return ids; ! } ! protected Tag createTag(TagData tagData, Tag tag, String url) throws ParserException { ! String frameUrl = extractFrameLocn(tag,url); ! String frameName = extractFrameName(tag,url); ! ! return new FrameTag(tagData,frameUrl,frameName); ! } } --- 69,108 ---- * @param url URL of web page being parsed. */ ! public String extractFrameLocn(Tag tag,String url) throws ParserException ! { ! try { ! Hashtable table = tag.getAttributes(); ! String relativeFrame = (String)table.get("SRC"); ! if (relativeFrame==null) return ""; else ! return (new LinkProcessor()).extract(relativeFrame,url); ! } ! catch (Exception e) { ! String msg; ! if (tag!=null) msg = tag.getText(); else msg = "null"; ! throw new ParserException("HTMLFrameScanner.extractFrameLocn() : Error in extracting frame location from tag "+msg,e); ! } ! } ! ! public String extractFrameName(Tag tag,String url) { ! return tag.getAttribute("NAME"); ! } ! /** ! * @see org.htmlparser.scanners.TagScanner#getID() ! */ ! public String [] getID() { ! String [] ids = new String[1]; ! ids[0] = "FRAME"; ! return ids; ! } ! protected Tag createTag(TagData tagData, Tag tag, String url) throws ParserException { ! String frameUrl = extractFrameLocn(tag,url); ! String frameName = extractFrameName(tag,url); ! ! return new FrameTag(tagData,frameUrl,frameName); ! } } Index: FrameSetScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/FrameSetScanner.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** FrameSetScanner.java 24 Aug 2003 21:59:42 -0000 1.24 --- FrameSetScanner.java 3 Sep 2003 23:36:19 -0000 1.25 *************** *** 47,71 **** public class FrameSetScanner extends CompositeTagScanner { ! private static final String MATCH_NAME [] = {"FRAMESET"}; ! ! public FrameSetScanner() ! { ! super(MATCH_NAME); ! } ! public FrameSetScanner(String filter) ! { ! super(filter,MATCH_NAME); ! } ! public String [] getID() { ! return MATCH_NAME; ! } ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new FrameSetTag(tagData,compositeTagData); ! } } --- 47,71 ---- public class FrameSetScanner extends CompositeTagScanner { ! private static final String MATCH_NAME [] = {"FRAMESET"}; ! ! public FrameSetScanner() ! { ! super(MATCH_NAME); ! } ! public FrameSetScanner(String filter) ! { ! super(filter,MATCH_NAME); ! } ! public String [] getID() { ! return MATCH_NAME; ! } ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new FrameSetTag(tagData,compositeTagData); ! } } Index: HeadScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/HeadScanner.java,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** HeadScanner.java 24 Aug 2003 21:59:42 -0000 1.11 --- HeadScanner.java 3 Sep 2003 23:36:19 -0000 1.12 *************** *** 1,61 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML ! // Copyright (C) Dec 31, 2000 Somik Raha ! // ! // This library is free software; you can redistribute it and/or ! // modify it under the terms of the GNU Lesser General Public ! // License as published by the Free Software Foundation; either ! // version 2.1 of the License, or (at your option) any later version. ! // ! // This library is distributed in the hope that it will be useful, ! // but WITHOUT ANY WARRANTY; without even the implied warranty of ! // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ! // Lesser General Public License for more details. ! // ! // You should have received a copy of the GNU Lesser General Public ! // License along with this library; if not, write to the Free Software ! // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // ! // For any questions or suggestions, you can write to me at : ! // Email :so...@in... ! // ! // Postal Address : ! // Somik Raha ! // Extreme Programmer & Coach ! // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, ! // CA 94708, USA ! // Website : http://www.industriallogic.com ! // ! // This class was contributed by Dhaval Udani ! // dha...@or... ! ! package org.htmlparser.scanners; ! ! import org.htmlparser.tags.HeadTag; ! import org.htmlparser.tags.Tag; ! import org.htmlparser.tags.data.CompositeTagData; ! import org.htmlparser.tags.data.TagData; ! ! public class HeadScanner extends CompositeTagScanner { ! private static final String MATCH_NAME [] = {"HEAD"}; ! private static final String ENDERS [] = {"BODY"}; ! private static final String END_TAG_ENDERS [] = {"HTML"}; ! ! public HeadScanner() { ! this(""); ! } ! ! public HeadScanner(String filter) { ! super(filter,MATCH_NAME,ENDERS,END_TAG_ENDERS,false); ! } ! ! public String [] getID() { ! return MATCH_NAME; ! } ! ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new HeadTag(tagData,compositeTagData); ! } ! } --- 1,61 ---- ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML ! // Copyright (C) Dec 31, 2000 Somik Raha ! // ! // This library is free software; you can redistribute it and/or ! // modify it under the terms of the GNU Lesser General Public ! // License as published by the Free Software Foundation; either ! // version 2.1 of the License, or (at your option) any later version. ! // ! // This library is distributed in the hope that it will be useful, ! // but WITHOUT ANY WARRANTY; without even the implied warranty of ! // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ! // Lesser General Public License for more details. ! // ! // You should have received a copy of the GNU Lesser General Public ! // License along with this library; if not, write to the Free Software ! // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // ! // For any questions or suggestions, you can write to me at : ! // Email :so...@in... ! // ! // Postal Address : ! // Somik Raha ! // Extreme Programmer & Coach ! // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, ! // CA 94708, USA ! // Website : http://www.industriallogic.com ! // ! // This class was contributed by Dhaval Udani ! // dha...@or... ! ! package org.htmlparser.scanners; ! ! import org.htmlparser.tags.HeadTag; ! import org.htmlparser.tags.Tag; ! import org.htmlparser.tags.data.CompositeTagData; ! import org.htmlparser.tags.data.TagData; ! ! public class HeadScanner extends CompositeTagScanner { ! private static final String MATCH_NAME [] = {"HEAD"}; ! private static final String ENDERS [] = {"BODY"}; ! private static final String END_TAG_ENDERS [] = {"HTML"}; ! ! public HeadScanner() { ! this(""); ! } ! ! public HeadScanner(String filter) { ! super(filter,MATCH_NAME,ENDERS,END_TAG_ENDERS,false); ! } ! ! public String [] getID() { ! return MATCH_NAME; ! } ! ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new HeadTag(tagData,compositeTagData); ! } ! } Index: HtmlScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/HtmlScanner.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** HtmlScanner.java 24 Aug 2003 21:59:42 -0000 1.26 --- HtmlScanner.java 3 Sep 2003 23:36:19 -0000 1.27 *************** *** 35,57 **** public class HtmlScanner extends CompositeTagScanner { ! private static String MATCH_STRING [] = {"HTML"}; ! ! public HtmlScanner() { ! this(""); ! } ! public HtmlScanner(String filter) { ! super(filter, MATCH_STRING); ! } ! public String[] getID() { ! return MATCH_STRING; ! } ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new Html(tagData,compositeTagData); ! } } --- 35,57 ---- public class HtmlScanner extends CompositeTagScanner { ! private static String MATCH_STRING [] = {"HTML"}; ! ! public HtmlScanner() { ! this(""); ! } ! public HtmlScanner(String filter) { ! super(filter, MATCH_STRING); ! } ! public String[] getID() { ! return MATCH_STRING; ! } ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new Html(tagData,compositeTagData); ! } } Index: ImageScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/ImageScanner.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** ImageScanner.java 24 Aug 2003 21:59:42 -0000 1.24 --- ImageScanner.java 3 Sep 2003 23:36:20 -0000 1.25 *************** *** 48,70 **** public class ImageScanner extends TagScanner { ! public static final String IMAGE_SCANNER_ID = "IMG"; ! private Hashtable table; ! private LinkProcessor processor; ! /** ! * Overriding the default constructor ! */ ! public ImageScanner() ! { ! super(); ! processor = new LinkProcessor(); ! } ! /** ! * Overriding the constructor to accept the filter ! */ ! public ImageScanner(String filter,LinkProcessor processor) ! { ! super(filter); ! this.processor = processor; ! } /** * Extract the location of the image, given the string to be parsed, and the url --- 48,70 ---- public class ImageScanner extends TagScanner { ! public static final String IMAGE_SCANNER_ID = "IMG"; ! private Hashtable table; ! private LinkProcessor processor; ! /** ! * Overriding the default constructor ! */ ! public ImageScanner() ! { ! super(); ! processor = new LinkProcessor(); ! } ! /** ! * Overriding the constructor to accept the filter ! */ ! public ImageScanner(String filter,LinkProcessor processor) ! { ! super(filter); ! this.processor = processor; ! } /** * Extract the location of the image, given the string to be parsed, and the url *************** *** 73,117 **** * @param url URL of web page being parsed. */ ! public String extractImageLocn(Tag tag,String url) throws ParserException ! { ! String relativeLink=null; ! try { ! table = tag.getAttributes(); ! relativeLink = (String)table.get("SRC"); ! if (relativeLink!=null) { ! relativeLink = ParserUtils.removeChars(relativeLink,'\n'); ! relativeLink = ParserUtils.removeChars(relativeLink,'\r'); ! } ! if (relativeLink==null || relativeLink.length()==0) { ! // try fix ! String tagText = tag.getText().toUpperCase(); ! int indexSrc = tagText.indexOf("SRC"); ! if (indexSrc != -1) { ! // There is a missing equals. ! tag.setText(tag.getText().substring(0,indexSrc+3)+"="+tag.getText().substring(indexSrc+3,tag.getText().length())); ! table = tag.redoParseAttributes(); ! relativeLink = (String) table.get("SRC"); ! ! } ! } ! if (relativeLink==null) return ""; else ! return processor.extract(relativeLink,url); ! } ! catch (Exception e) { ! throw new ParserException("HTMLImageScanner.extractImageLocn() : Error in extracting image location, relativeLink = "+relativeLink+", url = "+url,e); ! } ! } ! ! public String [] getID() { ! String [] ids = new String[1]; ! ids[0] = IMAGE_SCANNER_ID; ! return ids; ! } ! protected Tag createTag(TagData tagData, Tag tag, String url) ! throws ParserException { ! String link = extractImageLocn(tag,url); ! return new ImageTag(tagData, link); ! } } --- 73,117 ---- * @param url URL of web page being parsed. */ ! public String extractImageLocn(Tag tag,String url) throws ParserException ! { ! String relativeLink=null; ! try { ! table = tag.getAttributes(); ! relativeLink = (String)table.get("SRC"); ! if (relativeLink!=null) { ! relativeLink = ParserUtils.removeChars(relativeLink,'\n'); ! relativeLink = ParserUtils.removeChars(relativeLink,'\r'); ! } ! if (relativeLink==null || relativeLink.length()==0) { ! // try fix ! String tagText = tag.getText().toUpperCase(); ! int indexSrc = tagText.indexOf("SRC"); ! if (indexSrc != -1) { ! // There is a missing equals. ! tag.setText(tag.getText().substring(0,indexSrc+3)+"="+tag.getText().substring(indexSrc+3,tag.getText().length())); ! table = tag.redoParseAttributes(); ! relativeLink = (String) table.get("SRC"); ! ! } ! } ! if (relativeLink==null) return ""; else ! return processor.extract(relativeLink,url); ! } ! catch (Exception e) { ! throw new ParserException("HTMLImageScanner.extractImageLocn() : Error in extracting image location, relativeLink = "+relativeLink+", url = "+url,e); ! } ! } ! ! public String [] getID() { ! String [] ids = new String[1]; ! ids[0] = IMAGE_SCANNER_ID; ! return ids; ! } ! protected Tag createTag(TagData tagData, Tag tag, String url) ! throws ParserException { ! String link = extractImageLocn(tag,url); ! return new ImageTag(tagData, link); ! } } Index: InputTagScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/InputTagScanner.java,v retrieving revision 1.22 retrieving revision 1.23 diff -C2 -d -r1.22 -r1.23 *** InputTagScanner.java 24 Aug 2003 21:59:42 -0000 1.22 --- InputTagScanner.java 3 Sep 2003 23:36:20 -0000 1.23 *************** *** 36,59 **** public class InputTagScanner extends TagScanner { ! public InputTagScanner() ! { ! super(); ! } ! ! public InputTagScanner(String filter) ! { ! super(filter); ! } ! public String [] getID() { ! String [] ids = new String[1]; ! ids[0] = "INPUT"; ! return ids; ! } ! ! protected Tag createTag(TagData tagData, Tag tag, String url) ! throws ParserException { ! return new InputTag(tagData); ! } } --- 36,59 ---- public class InputTagScanner extends TagScanner { ! public InputTagScanner() ! { ! super(); ! } ! ! public InputTagScanner(String filter) ! { ! super(filter); ! } ! public String [] getID() { ! String [] ids = new String[1]; ! ids[0] = "INPUT"; ! return ids; ! } ! ! protected Tag createTag(TagData tagData, Tag tag, String url) ! throws ParserException { ! return new InputTag(tagData); ! } } Index: JspScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/JspScanner.java,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** JspScanner.java 24 Aug 2003 21:59:42 -0000 1.23 --- JspScanner.java 3 Sep 2003 23:36:20 -0000 1.24 *************** *** 40,65 **** public class JspScanner extends TagScanner { ! public JspScanner() { ! super(); ! } ! public JspScanner(String filter) { ! super(filter); ! } ! public String [] getID() { ! String [] ids = new String[3]; ! ids[0] = "%"; ! ids[1] = "%="; ! ids[2] = "%@"; ! return ids; ! } ! protected Tag createTag(TagData tagData, Tag tag, String url) ! throws ParserException { ! String tagContents = tagData.getTagContents(); ! tagData.setTagContents(tagContents.substring(1,tagContents.length()-1)); ! return new JspTag(tagData); ! } } --- 40,65 ---- public class JspScanner extends TagScanner { ! public JspScanner() { ! super(); ! } ! public JspScanner(String filter) { ! super(filter); ! } ! public String [] getID() { ! String [] ids = new String[3]; ! ids[0] = "%"; ! ids[1] = "%="; ! ids[2] = "%@"; ! return ids; ! } ! protected Tag createTag(TagData tagData, Tag tag, String url) ! throws ParserException { ! String tagContents = tagData.getTagContents(); ! tagData.setTagContents(tagContents.substring(1,tagContents.length()-1)); ! return new JspTag(tagData); ! } } Index: LabelScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/LabelScanner.java,v retrieving revision 1.29 retrieving revision 1.30 diff -C2 -d -r1.29 -r1.30 *** LabelScanner.java 24 Aug 2003 21:59:42 -0000 1.29 --- LabelScanner.java 3 Sep 2003 23:36:20 -0000 1.30 *************** *** 38,59 **** public class LabelScanner extends CompositeTagScanner { ! private static final String MATCH_NAME [] = {"LABEL"}; ! public LabelScanner() { ! super(MATCH_NAME,new String [] {},false); ! } ! ! public LabelScanner(String filter) { ! super(filter,MATCH_NAME,new String [] {},false); ! } ! public String [] getID() { ! return MATCH_NAME; ! } ! ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new LabelTag(tagData,compositeTagData); ! } } --- 38,59 ---- public class LabelScanner extends CompositeTagScanner { ! private static final String MATCH_NAME [] = {"LABEL"}; ! public LabelScanner() { ! super(MATCH_NAME,new String [] {},false); ! } ! ! public LabelScanner(String filter) { ! super(filter,MATCH_NAME,new String [] {},false); ! } ! public String [] getID() { ! return MATCH_NAME; ! } ! ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new LabelTag(tagData,compositeTagData); ! } } Index: LinkScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/LinkScanner.java,v retrieving revision 1.49 retrieving revision 1.50 diff -C2 -d -r1.49 -r1.50 *** LinkScanner.java 24 Aug 2003 21:59:42 -0000 1.49 --- LinkScanner.java 3 Sep 2003 23:36:20 -0000 1.50 *************** *** 51,121 **** public class LinkScanner extends CompositeTagScanner { ! private static final String MATCH_NAME [] = {"A"}; ! public static final String LINK_SCANNER_ID = "A"; ! public static final String DIRTY_TAG_MESSAGE=" is a dirty link tag - the tag was not closed. \nWe encountered an open tag, before the previous end tag was found.\nCorrecting this.."; ! private LinkProcessor processor; ! private final static String ENDERS [] = { "TD","TR","FORM","LI","BODY", "HTML" }; ! private final static String ENDTAG_ENDERS [] = { "TD","TR","FORM","LI","BODY", "HTML" }; ! ! /** ! * Overriding the default constructor ! */ ! public LinkScanner() { ! this(""); ! } ! ! /** ! * Overriding the constructor to accept the filter ! */ ! public LinkScanner(String filter) { ! super(filter,MATCH_NAME,ENDERS,ENDTAG_ENDERS, false); ! processor = new LinkProcessor(); ! } ! ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) throws ParserException { ! String link = extractLink(compositeTagData.getStartTag(),tagData.getUrlBeingParsed()); ! int mailto = link.indexOf("mailto"); ! boolean mailLink=false; ! if (mailto==0) ! { ! // yes it is ! mailto = link.indexOf(":"); ! link = link.substring(mailto+1); ! mailLink = true; ! } ! int javascript = link.indexOf("javascript:"); ! boolean javascriptLink = false; ! if (javascript == 0) { ! link = link.substring(11); // this magic number is "javascript:".length() ! javascriptLink = true; ! } ! String accessKey = getAccessKey(compositeTagData.getStartTag()); ! String myLinkText = compositeTagData.getChildren().toString(); ! ! LinkTag linkTag = new LinkTag( ! tagData, ! compositeTagData, ! new LinkData( ! link, ! myLinkText, ! accessKey, ! mailLink, ! javascriptLink ! ) ! ); ! linkTag.setThisScanner(this); ! return linkTag; ! } ! ! /** ! * Template Method, used to decide if this scanner can handle the Link tag type. If ! * the evaluation returns true, the calling side makes a call to scan(). ! * @param s The complete text contents of the Tag. ! * @param previousOpenScanner Indicates any previous scanner which hasnt completed, before the current ! * scan has begun, and hence allows us to write scanners that can work with dirty html ! */ public boolean evaluate (String s, TagScanner previousOpenScanner) { --- 51,121 ---- public class LinkScanner extends CompositeTagScanner { ! private static final String MATCH_NAME [] = {"A"}; ! public static final String LINK_SCANNER_ID = "A"; ! public static final String DIRTY_TAG_MESSAGE=" is a dirty link tag - the tag was not closed. \nWe encountered an open tag, before the previous end tag was found.\nCorrecting this.."; ! private LinkProcessor processor; ! private final static String ENDERS [] = { "TD","TR","FORM","LI","BODY", "HTML" }; ! private final static String ENDTAG_ENDERS [] = { "TD","TR","FORM","LI","BODY", "HTML" }; ! ! /** ! * Overriding the default constructor ! */ ! public LinkScanner() { ! this(""); ! } ! ! /** ! * Overriding the constructor to accept the filter ! */ ! public LinkScanner(String filter) { ! super(filter,MATCH_NAME,ENDERS,ENDTAG_ENDERS, false); ! processor = new LinkProcessor(); ! } ! ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) throws ParserException { ! String link = extractLink(compositeTagData.getStartTag(),tagData.getUrlBeingParsed()); ! int mailto = link.indexOf("mailto"); ! boolean mailLink=false; ! if (mailto==0) ! { ! // yes it is ! mailto = link.indexOf(":"); ! link = link.substring(mailto+1); ! mailLink = true; ! } ! int javascript = link.indexOf("javascript:"); ! boolean javascriptLink = false; ! if (javascript == 0) { ! link = link.substring(11); // this magic number is "javascript:".length() ! javascriptLink = true; ! } ! String accessKey = getAccessKey(compositeTagData.getStartTag()); ! String myLinkText = compositeTagData.getChildren().toString(); ! ! LinkTag linkTag = new LinkTag( ! tagData, ! compositeTagData, ! new LinkData( ! link, ! myLinkText, ! accessKey, ! mailLink, ! javascriptLink ! ) ! ); ! linkTag.setThisScanner(this); ! return linkTag; ! } ! ! /** ... [truncated message content] |