[Htmlparser-cvs] htmlparser/src/org/htmlparser AbstractNode.java,1.18,1.19 Node.java,1.42,1.43 Parse
Brought to you by:
derrickoswald
From: <der...@us...> - 2003-11-01 21:55:46
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser In directory sc8-pr-cvs1:/tmp/cvs-serv2656 Modified Files: AbstractNode.java Node.java Parser.java Log Message: Create nodes by cloning from a list of prototypes in the Parser (NodeFactory). So now, the startTag() is the CompositeTag, and the CompositeTagScanner just adds children. This is an intermediate code drop on the way to integrating the scanners with the tags; the scanners no longer create the tags (but they still create the prototypical ones). Index: AbstractNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/AbstractNode.java,v retrieving revision 1.18 retrieving revision 1.19 diff -C2 -d -r1.18 -r1.19 *** AbstractNode.java 26 Oct 2003 19:46:17 -0000 1.18 --- AbstractNode.java 1 Nov 2003 21:55:42 -0000 1.19 *************** *** 308,310 **** --- 308,317 ---- } + /** + * Perform the meaning of this tag. + * The default action is to do nothing. + */ + public void doSemanticAction () + { + } } Index: Node.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Node.java,v retrieving revision 1.42 retrieving revision 1.43 diff -C2 -d -r1.42 -r1.43 *** Node.java 26 Oct 2003 19:46:17 -0000 1.42 --- Node.java 1 Nov 2003 21:55:42 -0000 1.43 *************** *** 193,195 **** --- 193,206 ---- */ public void setText(String text); + + /** + * Perform the meaning of this tag. + * This is defined by the tag, for example the bold tag <B> may switch + * bold text on and off. + * Only a few tags have semantic meaning to the parser. These have to do + * with the character set to use (<META>), the base URL to use + * (<BASE>). Other than that, the semantic meaning is up to the + * application. + */ + public void doSemanticAction (); } Index: Parser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Parser.java,v retrieving revision 1.72 retrieving revision 1.73 diff -C2 -d -r1.72 -r1.73 *** Parser.java 29 Oct 2003 03:31:17 -0000 1.72 --- Parser.java 1 Nov 2003 21:55:42 -0000 1.73 *************** *** 37,40 **** --- 37,41 ---- import java.util.HashMap; import java.util.Hashtable; + import java.util.Iterator; import java.util.Map; import java.util.Vector; *************** *** 43,46 **** --- 44,48 ---- import org.htmlparser.lexer.Lexer; import org.htmlparser.lexer.Page; + import org.htmlparser.lexer.nodes.Attribute; import org.htmlparser.lexer.nodes.NodeFactory; import org.htmlparser.lexer.nodes.TagNode; *************** *** 52,55 **** --- 54,58 ---- import org.htmlparser.scanners.BodyScanner; import org.htmlparser.scanners.BulletListScanner; + import org.htmlparser.scanners.CompositeTagScanner; import org.htmlparser.scanners.DivScanner; import org.htmlparser.scanners.DoctypeScanner; *************** *** 192,197 **** * The list of scanners to apply at the top level. */ ! protected Map scanners; ! /** * The current scanner when recursing into a tag. --- 195,206 ---- * The list of scanners to apply at the top level. */ ! protected Map mScanners; ! ! /** ! * The list of tags to return at the top level. ! * The list is keyed by tag name. ! */ ! protected Map mBlastocyst; ! /** * The current scanner when recursing into a tag. *************** *** 376,380 **** * Set the connection for this parser. * This method creates a new <code>Lexer</code> reading from the connection. ! * It does not adjust the <code>scanners</code> list * or <code>feedback</code> object. Trying to * set the connection to null is a noop. --- 385,389 ---- * Set the connection for this parser. * This method creates a new <code>Lexer</code> reading from the connection. ! * It does not adjust the <code>mScanners</code> list * or <code>feedback</code> object. Trying to * set the connection to null is a noop. *************** *** 407,411 **** * Set the URL for this parser. * This method creates a new Lexer reading from the given URL. ! * It does not adjust the <code>scanners</code> list * or <code>feedback</code> object. Trying to set the url to null or an * empty string is a noop. --- 416,420 ---- * Set the URL for this parser. * This method creates a new Lexer reading from the given URL. ! * It does not adjust the <code>mScanners</code> list * or <code>feedback</code> object. Trying to set the url to null or an * empty string is a noop. *************** *** 453,457 **** /** * Set the lexer for this parser. ! * TIt does not adjust the <code>scanners</code> list * or <code>feedback</code> object. * Trying to set the lexer to <code>null</code> is a noop. --- 462,466 ---- /** * Set the lexer for this parser. ! * TIt does not adjust the <code>mScanners</code> list * or <code>feedback</code> object. * Trying to set the lexer to <code>null</code> is a noop. *************** *** 477,502 **** /** ! * Get the number of scanners registered currently in the scanner. ! * @return int number of scanners registered */ ! public int getNumScanners() { ! return scanners.size(); } /** * This method is to be used to change the set of scanners in the current parser. ! * @param newScanners Vector holding scanner objects to be used during the parsing process. */ ! public void setScanners(Map newScanners) { ! scanners = (null == newScanners) ? new HashMap() : newScanners; } /** ! * Get an enumeration of scanners registered currently in the parser ! * @return Enumeration of scanners currently registered in the parser */ ! public Map getScanners() { ! return scanners; } --- 486,522 ---- /** ! * Get the number of scanners registered currently in the parser. ! * @return int number of scanners registered. */ ! public int getNumScanners() ! { ! return mScanners.size(); } /** * This method is to be used to change the set of scanners in the current parser. ! * @param newScanners List of scanner objects to be used during the parsing process. */ ! public void setScanners (Map newScanners) { ! Iterator iterator; ! TagScanner scanner; ! ! flushScanners (); ! if (null != newScanners) ! for (iterator = newScanners.entrySet ().iterator (); iterator.hasNext (); ) ! { ! scanner = (TagScanner)iterator.next (); ! addScanner (scanner); ! } } /** ! * Get the list of scanners registered currently in the parser ! * @return List of scanners currently registered in the parser */ ! public Map getScanners() ! { ! return mScanners; } *************** *** 536,546 **** * It is advantageous to register only the scanners you want, in order to achieve faster parsing speed. This method * would also be of use when you have developed custom scanners, and need to register them into the parser. ! * @param scanner TagScanner object (or derivative) to be added to the list of registered scanners */ public void addScanner(TagScanner scanner) { ! String ids[] = scanner.getID(); ! for (int i=0;i<ids.length;i++) { ! scanners.put(ids[i],scanner); } } --- 556,590 ---- * It is advantageous to register only the scanners you want, in order to achieve faster parsing speed. This method * would also be of use when you have developed custom scanners, and need to register them into the parser. ! * @param scanner TagScanner object (or derivative) to be added to the list of registered scanners. */ public void addScanner(TagScanner scanner) { ! String ids[]; ! Tag tag; ! ! ids = scanner.getID(); ! for (int i = 0; i < ids.length; i++) ! { ! mScanners.put (ids[i], scanner); ! // for now, the only way to create a tag is to ask the scanner... ! try ! { ! if (scanner instanceof CompositeTagScanner) ! { ! tag = ((CompositeTagScanner)scanner).createTag (null, 0, 0, null, null, null, null); ! tag.setThisScanner (scanner); ! mBlastocyst.put (ids[i], tag); ! } ! else ! { ! tag = scanner.createTag (null, 0, 0, null, null, null); ! tag.setThisScanner (scanner); ! mBlastocyst.put (ids[i], tag); ! } ! } ! catch (Exception e) ! { ! e.printStackTrace (); ! } } } *************** *** 585,589 **** try { ! if (null == scanners.get ("META")) { addScanner (new MetaTagScanner ("-m")); --- 629,633 ---- try { ! if (null == mScanners.get ("META")) { addScanner (new MetaTagScanner ("-m")); *************** *** 626,630 **** { if (remove_scanner) ! scanners.remove ("META"); } --- 670,674 ---- { if (remove_scanner) ! mScanners.remove ("META"); } *************** *** 633,640 **** /** ! * Flush the current scanners registered. The registered scanners list becomes empty with this call. */ ! public void flushScanners() { ! scanners = new Hashtable(); } --- 677,687 ---- /** ! * Flush the current scanners registered. ! * The registered scanners list becomes empty with this call. */ ! public void flushScanners() ! { ! mScanners = new Hashtable (); ! mBlastocyst = new Hashtable (); } *************** *** 645,650 **** * @return TagScanner The Tag Scanner */ ! public TagScanner getScanner(String id) { ! return (TagScanner)scanners.get(id); } --- 692,707 ---- * @return TagScanner The Tag Scanner */ ! public TagScanner getScanner (String id) ! { ! Tag tag; ! TagScanner ret; ! ! ret = null; ! ! tag = (Tag)mBlastocyst.get (id); ! if (null != tag) ! ret = (TagScanner)tag.getThisScanner (); ! ! return (ret); } *************** *** 686,690 **** /** ! * This method should be invoked in order to register some common scanners. The scanners that get added are : <br> * LinkScanner (filter key "-l")<br> * ImageScanner (filter key "-i")<br> --- 743,748 ---- /** ! * This method should be invoked in order to register some common scanners. ! * The scanners that get added are : <br> * LinkScanner (filter key "-l")<br> * ImageScanner (filter key "-i")<br> *************** *** 709,715 **** */ public void registerScanners() { ! if (scanners.size()>0) { System.err.println("registerScanners() should be called first, when no other scanner has been registered."); ! System.err.println("Other scanners already exist, hence this method call wont have any effect"); return; } --- 767,774 ---- */ public void registerScanners() { ! if (mScanners.size()>0) ! { System.err.println("registerScanners() should be called first, when no other scanner has been registered."); ! System.err.println("Other scanners already exist, hence this method call won't have any effect"); return; } *************** *** 758,763 **** * @param scanner TagScanner object to be removed from the list of registered scanners */ ! public void removeScanner(TagScanner scanner) { ! scanners.remove(scanner.getID()[0]); } --- 817,830 ---- * @param scanner TagScanner object to be removed from the list of registered scanners */ ! public void removeScanner(TagScanner scanner) ! { ! String[] ids; ! ! ids = scanner.getID (); ! for (int i = 0; i < ids.length; i++) ! { ! mScanners.remove (ids[i]); ! mBlastocyst.remove (ids[i]); ! } } *************** *** 1029,1033 **** ParserException { ! return (new Tag (page, start, end, attributes)); } } --- 1096,1140 ---- ParserException { ! Attribute attribute; ! String id; ! Tag prototype; ! Tag ret; ! ! ret = null; ! ! if (0 != attributes.size ()) ! { ! attribute = (Attribute)attributes.elementAt (0); ! id = attribute.getName (); ! if (null != id) ! { ! try ! { ! id = id.toUpperCase (); ! if (!id.startsWith ("/")) ! { ! if (id.endsWith ("/")) ! id = id.substring (0, id.length () - 1); ! prototype = (Tag)mBlastocyst.get (id); ! if (null != prototype) ! { ! ret = (Tag)prototype.clone (); ! ret.setPage (page); ! ret.setStartPosition (start); ! ret.setEndPosition (end); ! ret.setAttributesEx (attributes); ! } ! } ! } ! catch (CloneNotSupportedException cnse) ! { ! // default to creating a new one ! } ! } ! } ! if (null == ret) ! ret = new Tag (page, start, end, attributes); ! ! return (ret); } } |