[Htmlparser-cvs] htmlparser/src/org/htmlparser/scanners BaseHrefScanner.java,1.28,1.29 FormScanner.j
Brought to you by:
derrickoswald
From: <der...@us...> - 2003-10-29 03:31:28
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners In directory sc8-pr-cvs1:/tmp/cvs-serv12839/src/org/htmlparser/scanners Modified Files: BaseHrefScanner.java FormScanner.java ImageScanner.java LinkScanner.java Log Message: Move LinkProcess out of scanners and into Page, untangling A, IMG and BASE scanners. Move form action determination to tag. The scanners have no special actions on behalf of tags anymore. Index: BaseHrefScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/BaseHrefScanner.java,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** BaseHrefScanner.java 26 Oct 2003 19:46:19 -0000 1.28 --- BaseHrefScanner.java 29 Oct 2003 03:31:17 -0000 1.29 *************** *** 33,37 **** import org.htmlparser.tags.BaseHrefTag; import org.htmlparser.tags.Tag; - import org.htmlparser.util.LinkProcessor; import org.htmlparser.util.ParserException; --- 33,36 ---- *************** *** 43,48 **** public class BaseHrefScanner extends TagScanner { - private LinkProcessor processor; - public BaseHrefScanner() { --- 42,45 ---- *************** *** 50,57 **** } ! public BaseHrefScanner(String filter,LinkProcessor processor) { super(filter); - this.processor = processor; } --- 47,53 ---- } ! public BaseHrefScanner(String filter) { super(filter); } *************** *** 73,81 **** ret.setAttributesEx (attributes); - // special step here - // Need to set the base url for the current link processor, - // which can't be done in the tag because it doesn't have it. - processor.setBaseUrl (ret.getBaseUrl ()); - return (ret); } --- 69,72 ---- Index: FormScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/FormScanner.java,v retrieving revision 1.51 retrieving revision 1.52 diff -C2 -d -r1.51 -r1.52 *** FormScanner.java 28 Oct 2003 12:54:21 -0000 1.51 --- FormScanner.java 29 Oct 2003 03:31:17 -0000 1.52 *************** *** 70,106 **** /** - * Extract the location of the image, given the tag, and the url - * of the html page in which this tag exists. - * @param tag The form tag with the 'ACTION' attribute. - * @param url URL of web page being parsed. - */ - public String extractFormLocn(Tag tag,String url) throws ParserException - { - try { - String formURL= tag.getAttribute("ACTION"); - if (formURL==null) return ""; else - return (new LinkProcessor()).extract(formURL, url); - } - catch (Exception e) { - String msg; - if (tag!=null) msg= tag.getText(); else msg=""; - throw new ParserException("HTMLFormScanner.extractFormLocn() : Error in extracting form location, tag = "+msg+", url = "+url,e); - } - } - - public String extractFormName(Tag tag) - { - return tag.getAttribute("NAME"); - } - - public String extractFormMethod(Tag tag) - { - String method = tag.getAttribute("METHOD"); - if (method==null) method = FormTag.GET; - return method.toUpperCase(); - - } - - /** * @see org.htmlparser.scanners.TagScanner#getID() */ --- 70,73 ---- *************** *** 122,131 **** ret.setEndTag (endTag); ret.setChildren (children); - - // special step here... - // ... is it true that without an ACTION the default is to send it back to the same page? - String formUrl = extractFormLocn(startTag, page.getUrl ()); - if (formUrl!=null && formUrl.length()>0) - startTag.setAttribute("ACTION",formUrl); return (ret); --- 89,92 ---- Index: ImageScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/ImageScanner.java,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** ImageScanner.java 26 Oct 2003 19:46:20 -0000 1.32 --- ImageScanner.java 29 Oct 2003 03:31:17 -0000 1.33 *************** *** 39,45 **** import org.htmlparser.tags.ImageTag; import org.htmlparser.tags.Tag; - import org.htmlparser.util.LinkProcessor; import org.htmlparser.util.ParserException; import org.htmlparser.util.ParserUtils; /** * Scans for the Image Tag. This is a subclass of TagScanner, and is called using a --- 39,45 ---- import org.htmlparser.tags.ImageTag; import org.htmlparser.tags.Tag; import org.htmlparser.util.ParserException; import org.htmlparser.util.ParserUtils; + /** * Scans for the Image Tag. This is a subclass of TagScanner, and is called using a *************** *** 51,55 **** { public static final String IMAGE_SCANNER_ID = "IMG"; ! private LinkProcessor processor; /** * Overriding the default constructor --- 51,55 ---- { public static final String IMAGE_SCANNER_ID = "IMG"; ! /** * Overriding the default constructor *************** *** 58,70 **** { super(); - processor = new LinkProcessor(); } /** * Overriding the constructor to accept the filter */ ! public ImageScanner(String filter,LinkProcessor processor) { super(filter); - this.processor = processor; } --- 58,69 ---- { super(); } + /** * Overriding the constructor to accept the filter */ ! public ImageScanner(String filter) { super(filter); } *************** *** 84,100 **** ret.setEndPosition (end); ret.setAttributesEx (attributes); - - // special step here... - // Need to update the imageURL string in the image tag, - // but not the SRC attribute which it does when you set the ImageURL - // property. Can't do it in the tag, because the tag doesn't have the - // current link processor object which might have a BASE href different - // than the page. - String src = ret.getAttribute ("SRC"); - ret.setImageURL (processor.extract (ret.getImageURL (), page.getUrl ())); - if (null == src) - ret.removeAttribute ("SRC"); - else - ret.setAttribute ("SRC", src); return (ret); --- 83,86 ---- Index: LinkScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/LinkScanner.java,v retrieving revision 1.58 retrieving revision 1.59 diff -C2 -d -r1.58 -r1.59 *** LinkScanner.java 28 Oct 2003 10:31:02 -0000 1.58 --- LinkScanner.java 29 Oct 2003 03:31:17 -0000 1.59 *************** *** 53,57 **** private static final String MATCH_NAME [] = {"A"}; public static final String LINK_SCANNER_ID = "A"; - public LinkProcessor processor; private final static String ENDERS [] = { "A","TD","TR","FORM","LI","BODY", "HTML" }; private final static String ENDTAG_ENDERS [] = { "TD","TR","FORM","LI","BODY", "HTML" }; --- 53,56 ---- *************** *** 69,73 **** public LinkScanner(String filter) { super(filter,MATCH_NAME,ENDERS,ENDTAG_ENDERS); - processor = new LinkProcessor(); } --- 68,71 ---- *************** *** 99,110 **** } - public BaseHrefScanner createBaseHREFScanner(String filter) { - return new BaseHrefScanner(filter,processor); - } - - public ImageScanner createImageScanner(String filter) { - return new ImageScanner(filter,processor); - } - /** * @see org.htmlparser.scanners.TagScanner#getID() --- 97,100 ---- *************** *** 113,116 **** return MATCH_NAME; } - } --- 103,105 ---- |