[Htmlparser-cvs] htmlparser/src/org/htmlparser Parser.java,1.69,1.70
Brought to you by:
derrickoswald
From: <der...@us...> - 2003-10-28 03:05:42
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser In directory sc8-pr-cvs1:/tmp/cvs-serv19975 Modified Files: Parser.java Log Message: Moved the recursion from the NodeFactory to the CompositeTagScanner where it belongs. Also needed to kick off the recursion in IteratorImpl. The scnner is obtained in a kludgy way -- just 'til tags know their own scanners. Also fixed the other NodeFactory signatures to have a Page rather than a Lexer. Index: Parser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Parser.java,v retrieving revision 1.69 retrieving revision 1.70 diff -C2 -d -r1.69 -r1.70 *** Parser.java 27 Oct 2003 02:18:02 -0000 1.69 --- Parser.java 28 Oct 2003 03:04:17 -0000 1.70 *************** *** 981,993 **** /** * Create a new string node. ! * @param lexer The lexer parsing this string. * @param start The beginning position of the string. * @param end The ending positiong of the string. */ ! public Node createStringNode (Lexer lexer, int start, int end) { Node ret; ! ret = new StringNode (lexer.getPage (), start, end); if (null != stringNodeFactory) { --- 981,993 ---- /** * Create a new string node. ! * @param page The page the node is on. * @param start The beginning position of the string. * @param end The ending positiong of the string. */ ! public Node createStringNode (Page page, int start, int end) { Node ret; ! ret = new StringNode (page, start, end); if (null != stringNodeFactory) { *************** *** 1005,1015 **** /** * Create a new remark node. ! * @param lexer The lexer parsing this remark. * @param start The beginning position of the remark. * @param end The ending positiong of the remark. */ ! public Node createRemarkNode (Lexer lexer, int start, int end) { ! return (new RemarkNode (lexer.getPage (), start, end)); } --- 1005,1015 ---- /** * Create a new remark node. ! * @param page The page the node is on. * @param start The beginning position of the remark. * @param end The ending positiong of the remark. */ ! public Node createRemarkNode (Page page, int start, int end) { ! return (new RemarkNode (page, start, end)); } *************** *** 1020,1091 **** * This can be used to decide which type of node to create, or * gate other processing that may be appropriate. ! * @param lexer The lexer parsing this tag. * @param start The beginning position of the tag. * @param end The ending positiong of the tag. * @param attributes The attributes contained in this tag. */ ! public Node createTagNode (Lexer lexer, int start, int end, Vector attributes) ! throws ! ParserException ! { ! return (new Tag (lexer.getPage (), start, end, attributes)); ! } ! ! /** ! * Scan a new tag node. ! * Provides composite tags the opportunity to collect their children by ! * scanning forward using the same lexer that created the composite tag. ! * On isolating a tag, processing in the lexer is: ! * <pre><code> ! * Node node = getNodeFactory ().createTagNode (this, begin, end, attributes); ! * node = getNodeFactory ().scanTagNode (this, node); ! * </code></pre> ! * This two step process, allows a node factory to only handle node ! * creation if it wishes, and delegate the recursion and scanning of child ! * nodes to the original factory. ! * Without giving too much implementation details, the low level lexer node ! * factory simply returns the same tag, while the higher level parser node ! * factory checks for a scanner registered for the node type and if there ! * is one, calls the scanner to create the specific type of node, which ! * advances the lexer past the children of the node. ! * @param lexer The lexer that parsed this tag. ! * @param tag The tag (just) created by createTagNode. Although this is ! * of type Node, it can safely be cast to the type returned by ! * {@link #createTagNode createTagNode}. ! * @return Either the same node or a new node containing children. ! * In any case the lexer should be positioned to proceed with the isolation ! * of the next unknown node. ! */ ! public Node scanTagNode (Lexer lexer, Node tag) throws ParserException { ! String name; ! TagScanner save; ! TagScanner scanner; ! Tag ret; ! ! ret = (Tag)tag; ! if (!ret.isEndTag ()) ! { ! // now recurse if there is a scanner for this type of tag ! name = ret.getTagName (); ! scanner = (TagScanner)scanners.get (name); ! save = mScanner; ! if ((null != scanner) && scanner.evaluate (ret, save)) ! { ! mScanner = scanner; ! try ! { ! ret = scanner.createScannedNode (ret, lexer.getPage ().getUrl (), lexer); ! } ! finally ! { ! mScanner = save; ! } ! } ! } ! ! return (ret); } } --- 1020,1033 ---- * This can be used to decide which type of node to create, or * gate other processing that may be appropriate. ! * @param page The page the node is on. * @param start The beginning position of the tag. * @param end The ending positiong of the tag. * @param attributes The attributes contained in this tag. */ ! public Node createTagNode (Page page, int start, int end, Vector attributes) throws ParserException { ! return (new Tag (page, start, end, attributes)); } } |