[Htmlparser-cvs] htmlparser/src/org/htmlparser/lexer/nodes RemarkNode.java,1.1,1.2 StringNode.java,1
Brought to you by:
derrickoswald
From: <der...@us...> - 2003-08-23 17:50:00
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes In directory sc8-pr-cvs1:/tmp/cvs-serv20167/lexer/nodes Modified Files: RemarkNode.java StringNode.java TagNode.java Log Message: Sixth drop for new i/o subsystem. Isolated htmllexer.jar file and made it compileable and runnable on JDK 1.1 systems. The build.xml file now has four new targets for separate compiling and jaring of the lexer and parser. Significantly refactored the existing Node interface and AbstractNode class to achieve isolation. They now support get/setChildren(), rather than CompositeTag. Various scanners that were directly accessing the childTags node list were affected. The get/setParent is now a generic Node rather than a CompositeTag. The visitor accept() signature was changed to Object to avoid dragging in visitors code. This was *not* changed on classes derived from Tag, although it could be. ChainedException now uses/returns a Vector. Removed the cruft from lexer nodes where possible. Index: RemarkNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/RemarkNode.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** RemarkNode.java 17 Aug 2003 16:09:28 -0000 1.1 --- RemarkNode.java 23 Aug 2003 17:14:45 -0000 1.2 *************** *** 33,37 **** import org.htmlparser.lexer.Page; import org.htmlparser.util.NodeList; - import org.htmlparser.visitors.NodeVisitor; /** --- 33,36 ---- *************** *** 86,92 **** } ! public void accept(NodeVisitor visitor) { ! // todo: fix this ! // visitor.visitRemarkNode(this); } } --- 85,89 ---- } ! public void accept(Object visitor) { } } Index: StringNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/StringNode.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** StringNode.java 17 Aug 2003 16:09:28 -0000 1.1 --- StringNode.java 23 Aug 2003 17:14:45 -0000 1.2 *************** *** 34,38 **** import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; - import org.htmlparser.visitors.NodeVisitor; /** --- 34,37 ---- *************** *** 106,113 **** } ! public void accept (NodeVisitor visitor) { - // todo: fix this - // visitor.visitStringNode (this); } } --- 105,110 ---- } ! public void accept (Object visitor) { } } Index: TagNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/TagNode.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** TagNode.java 21 Aug 2003 01:52:23 -0000 1.2 --- TagNode.java 23 Aug 2003 17:14:45 -0000 1.3 *************** *** 30,48 **** import java.util.Enumeration; - import java.util.HashSet; import java.util.Hashtable; - import java.util.Map; import java.util.Vector; - import org.htmlparser.lexer.Cursor; import org.htmlparser.lexer.Page; import org.htmlparser.parserHelper.SpecialHashtable; - import org.htmlparser.parserHelper.TagParser; - import org.htmlparser.scanners.TagScanner; import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; ! import org.htmlparser.visitors.NodeVisitor; /** ! * Tag represents a generic tag. This class allows users to register specific * tag scanners, which can identify links, or image references. This tag asks the * scanners to run over the text, and identify. It can be used to dynamically --- 30,44 ---- import java.util.Enumeration; import java.util.Hashtable; import java.util.Vector; import org.htmlparser.lexer.Cursor; + import org.htmlparser.lexer.Page; import org.htmlparser.parserHelper.SpecialHashtable; import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; ! /** ! * TagNode represents a generic tag. This class allows users to register specific * tag scanners, which can identify links, or image references. This tag asks the * scanners to run over the text, and identify. It can be used to dynamically *************** *** 63,67 **** private final static String EMPTY_STRING=""; - private static TagParser tagParser; private boolean emptyXmlTag = false; --- 59,62 ---- *************** *** 72,118 **** protected Vector mAttributes; - /** - * Scanner associated with this tag (useful for extraction of filtering data from a - * HTML node) - */ - protected TagScanner thisScanner = null; - /** * Set of tags that breaks the flow. */ ! protected static HashSet breakTags; static { ! breakTags = new HashSet (30); ! breakTags.add ("BLOCKQUOTE"); ! breakTags.add ("BODY"); ! breakTags.add ("BR"); ! breakTags.add ("CENTER"); ! breakTags.add ("DD"); ! breakTags.add ("DIR"); ! breakTags.add ("DIV"); ! breakTags.add ("DL"); ! breakTags.add ("DT"); ! breakTags.add ("FORM"); ! breakTags.add ("H1"); ! breakTags.add ("H2"); ! breakTags.add ("H3"); ! breakTags.add ("H4"); ! breakTags.add ("H5"); ! breakTags.add ("H6"); ! breakTags.add ("HEAD"); ! breakTags.add ("HR"); ! breakTags.add ("HTML"); ! breakTags.add ("ISINDEX"); ! breakTags.add ("LI"); ! breakTags.add ("MENU"); ! breakTags.add ("NOFRAMES"); ! breakTags.add ("OL"); ! breakTags.add ("P"); ! breakTags.add ("PRE"); ! breakTags.add ("TD"); ! breakTags.add ("TH"); ! breakTags.add ("TITLE"); ! breakTags.add ("UL"); } --- 67,107 ---- protected Vector mAttributes; /** * Set of tags that breaks the flow. */ ! protected static Hashtable breakTags; static { ! breakTags = new Hashtable (30); ! breakTags.put ("BLOCKQUOTE", Boolean.TRUE); ! breakTags.put ("BODY", Boolean.TRUE); ! breakTags.put ("BR", Boolean.TRUE); ! breakTags.put ("CENTER", Boolean.TRUE); ! breakTags.put ("DD", Boolean.TRUE); ! breakTags.put ("DIR", Boolean.TRUE); ! breakTags.put ("DIV", Boolean.TRUE); ! breakTags.put ("DL", Boolean.TRUE); ! breakTags.put ("DT", Boolean.TRUE); ! breakTags.put ("FORM", Boolean.TRUE); ! breakTags.put ("H1", Boolean.TRUE); ! breakTags.put ("H2", Boolean.TRUE); ! breakTags.put ("H3", Boolean.TRUE); ! breakTags.put ("H4", Boolean.TRUE); ! breakTags.put ("H5", Boolean.TRUE); ! breakTags.put ("H6", Boolean.TRUE); ! breakTags.put ("HEAD", Boolean.TRUE); ! breakTags.put ("HR", Boolean.TRUE); ! breakTags.put ("HTML", Boolean.TRUE); ! breakTags.put ("ISINDEX", Boolean.TRUE); ! breakTags.put ("LI", Boolean.TRUE); ! breakTags.put ("MENU", Boolean.TRUE); ! breakTags.put ("NOFRAMES", Boolean.TRUE); ! breakTags.put ("OL", Boolean.TRUE); ! breakTags.put ("P", Boolean.TRUE); ! breakTags.put ("PRE", Boolean.TRUE); ! breakTags.put ("TD", Boolean.TRUE); ! breakTags.put ("TH", Boolean.TRUE); ! breakTags.put ("TITLE", Boolean.TRUE); ! breakTags.put ("UL", Boolean.TRUE); } *************** *** 132,145 **** /** - * Locate the tag withing the input string, by parsing from the given position - * @param reader HTML reader to be provided so as to allow reading of next line - * @param input Input String - * @param position Position to start parsing from - */ - // public static Tag find(NodeReader reader,String input,int position) { - // return tagParser.find(reader,input,position); - // } - - /** * In case the tag is parsed at the scan method this will return value of a * parameter not implemented yet --- 121,124 ---- *************** *** 202,206 **** // special handling for the node name attribute = (Attribute)attributes.elementAt (0); ! ret.put (org.htmlparser.tags.Tag.TAGNAME, attribute.getName ().toUpperCase ()); // the rest for (int i = 1; i < attributes.size (); i++) --- 181,185 ---- // special handling for the node name attribute = (Attribute)attributes.elementAt (0); ! ret.put (TAGNAME, attribute.getName ().toUpperCase ()); // the rest for (int i = 1; i < attributes.size (); i++) *************** *** 235,239 **** } else ! ret.put (org.htmlparser.tags.Tag.TAGNAME, ""); return (ret); --- 214,218 ---- } else ! ret.put (TAGNAME, ""); return (ret); *************** *** 253,336 **** /** - * Return the scanner associated with this tag. - */ - public TagScanner getThisScanner() - { - return thisScanner; - } - - /** - * Extract the first word from the given string. - * Words are delimited by whitespace or equals signs. - * @param s The string to get the word from. - * @return The first word. - */ - // public static String extractWord (String s) - // { - // int length; - // boolean parse; - // char ch; - // StringBuffer ret; - // - // length = s.length (); - // ret = new StringBuffer (length); - // parse = true; - // for (int i = 0; i < length && parse; i++) - // { - // ch = s.charAt (i); - // if (Character.isWhitespace (ch) || ch == '=') - // parse = false; - // else - // ret.append (Character.toUpperCase (ch)); - // } - // - // return (ret.toString ()); - // } - - /** - * Scan the tag to see using the registered scanners, and attempt identification. - * @param url URL at which HTML page is located - * @param reader The NodeReader that is to be used for reading the url - */ - // public AbstractNode scan(Map scanners,String url,NodeReader reader) throws ParserException - // { - // if (tagContents.length()==0) return this; - // try { - // boolean found=false; - // AbstractNode retVal=null; - // // Find the first word in the scanners - // String firstWord = extractWord(tagContents.toString()); - // // Now, get the scanner associated with this. - // TagScanner scanner = (TagScanner)scanners.get(firstWord); - // - // // Now do a deep check - // if (scanner != null && - // scanner.evaluate( - // tagContents.toString(), - // reader.getPreviousOpenScanner() - // ) - // ) - // { - // found=true; - // TagScanner save; - // save = reader.getPreviousOpenScanner (); - // reader.setPreviousOpenScanner(scanner); - // retVal=scanner.createScannedNode(this,url,reader,tagLine); - // reader.setPreviousOpenScanner(save); - // } - // - // if (!found) return this; - // else { - // return retVal; - // } - // } - // catch (Exception e) { - // String errorMsg; - // if (tagContents!=null) errorMsg = tagContents.toString(); else errorMsg="null"; - // throw new ParserException("Tag.scan() : Error while scanning tag, tag contents = "+errorMsg+", tagLine = "+tagLine,e); - // } - // } - - /** * Sets the attributes. * @param attributes The attribute collection to set. --- 232,235 ---- *************** *** 423,439 **** } } - public void setThisScanner(TagScanner scanner) - { - thisScanner = scanner; - } ! public String toPlainTextString() { return EMPTY_STRING; } /** ! * A call to a tag's toHTML() method will render it in HTML ! * Most tags that do not have children and inherit from Tag, ! * do not need to override toHTML(). * @see org.htmlparser.Node#toHtml() */ --- 322,332 ---- } } ! public String toPlainTextString() { return EMPTY_STRING; } /** ! * A call to a tag's toHTML() method will render it in HTML. * @see org.htmlparser.Node#toHtml() */ *************** *** 487,498 **** /** - * Sets the tagParser. - * @param tagParser The tagParser to set - */ - public static void setTagParser(TagParser tagParser) { - //todo: fix this Tag.tagParser = tagParser; - } - - /** * Determines if the given tag breaks the flow of text. * @return <code>true</code> if following text would start on a new line, --- 380,383 ---- *************** *** 501,505 **** public boolean breaksFlow () { ! return (breakTags.contains (getText ().toUpperCase ())); } --- 386,390 ---- public boolean breaksFlow () { ! return (breakTags.containsKey (getText ().toUpperCase ())); } *************** *** 511,517 **** * @see org.htmlparser.Node#collectInto(NodeList, String) */ ! public void collectInto(NodeList collectionList, String filter) { ! if (thisScanner!=null && thisScanner.getFilter()==filter) ! collectionList.add(this); } --- 396,401 ---- * @see org.htmlparser.Node#collectInto(NodeList, String) */ ! public void collectInto(NodeList collectionList, String filter) ! { } *************** *** 541,546 **** } ! public void accept(NodeVisitor visitor) { ! // todo: fix this visitor.visitTag(this); } --- 425,429 ---- } ! public void accept(Object visitor) { } |