[Htmlparser-cvs] htmlparser/src/org/htmlparser/parserHelper AttributeParser.java,1.38,1.39 Composite
Brought to you by:
derrickoswald
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper In directory sc8-pr-cvs1:/tmp/cvs-serv24483/src/org/htmlparser/parserHelper Modified Files: AttributeParser.java CompositeTagScannerHelper.java ParserHelper.java ScriptScannerHelper.java StringParser.java TagParser.java Log Message: Add style checking target to ant build script: ant checkstyle It uses a jar from http://checkstyle.sourceforge.net which is dropped in the lib directory. The rules are in the file htmlparser_checks.xml in the src directory. Added lexerapplications package with Tabby as the first app. It performs whitespace manipulation on source files to follow the style rules. This reduced the number of style violations to roughly 14,000. There are a few issues with the style checker that need to be resolved before it should be taken too seriously. For example: It thinks all method arguments should be final, even if they are modified by the code (which the compiler frowns on). It complains about long lines, even when there is no possibility of wrapping the line, i.e. a URL in a comment that's more than 80 characters long. It considers all naked integers as 'magic numbers', even when they are obvious, i.e. the 4 corners of a box. It complains about whitespace following braces, even in array initializers, i.e. X[][] = { {a, b} { } } But it points out some really interesting things, even if you don't agree with the style guidelines, so it's worth a look. Index: AttributeParser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/AttributeParser.java,v retrieving revision 1.38 retrieving revision 1.39 diff -C2 -d -r1.38 -r1.39 *** AttributeParser.java 8 Sep 2003 02:26:29 -0000 1.38 --- AttributeParser.java 10 Sep 2003 03:38:18 -0000 1.39 *************** *** 233,237 **** return delim.indexOf(tokenAccumulator)>=0; } ! private boolean isCurrentTokenSingleQuote() { return currentToken.charAt(0)==SINGLE_QUOTE; --- 233,237 ---- return delim.indexOf(tokenAccumulator)>=0; } ! private boolean isCurrentTokenSingleQuote() { return currentToken.charAt(0)==SINGLE_QUOTE; Index: CompositeTagScannerHelper.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/CompositeTagScannerHelper.java,v retrieving revision 1.45 retrieving revision 1.46 diff -C2 -d -r1.45 -r1.46 *** CompositeTagScannerHelper.java 8 Sep 2003 02:26:29 -0000 1.45 --- CompositeTagScannerHelper.java 10 Sep 2003 03:38:18 -0000 1.46 *************** *** 1,27 **** // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 1,27 ---- // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 52,69 **** private int endingLineNumber; private boolean balance_quotes; ! public CompositeTagScannerHelper( CompositeTagScanner scanner, ! Tag tag, ! String url, NodeReader reader, String currLine, boolean balance_quotes) { ! this.scanner = scanner; this.tag = tag; this.url = url; this.reader = reader; ! this.currLine = currLine; this.endTag = null; this.nodeList = new NodeList(); --- 52,69 ---- private int endingLineNumber; private boolean balance_quotes; ! public CompositeTagScannerHelper( CompositeTagScanner scanner, ! Tag tag, ! String url, NodeReader reader, String currLine, boolean balance_quotes) { ! this.scanner = scanner; this.tag = tag; this.url = url; this.reader = reader; ! this.currLine = currLine; this.endTag = null; this.nodeList = new NodeList(); *************** *** 79,95 **** scanner.beforeScanningStarts(); Node currentNode = tag; ! doEmptyXmlTagCheckOn(currentNode); ! if (!endTagFound) { do { currentNode = reader.readElement(balance_quotes); ! if (currentNode==null) continue; currLine = reader.getCurrentLine(); ! if (currentNode instanceof Tag) doForceCorrectionCheckOn((Tag)currentNode); ! doEmptyXmlTagCheckOn(currentNode); if (!endTagFound) ! doChildAndEndTagCheckOn(currentNode); } while (currentNode!=null && !endTagFound); --- 79,95 ---- scanner.beforeScanningStarts(); Node currentNode = tag; ! doEmptyXmlTagCheckOn(currentNode); ! if (!endTagFound) { do { currentNode = reader.readElement(balance_quotes); ! if (currentNode==null) continue; currLine = reader.getCurrentLine(); ! if (currentNode instanceof Tag) doForceCorrectionCheckOn((Tag)currentNode); ! doEmptyXmlTagCheckOn(currentNode); if (!endTagFound) ! doChildAndEndTagCheckOn(currentNode); } while (currentNode!=null && !endTagFound); *************** *** 98,102 **** createCorrectionEndTagBefore(reader.getLastReadPosition()+1); } ! this.endingLineNumber = reader.getLastLineNumber(); return createTag(); --- 98,102 ---- createCorrectionEndTagBefore(reader.getLastReadPosition()+1); } ! this.endingLineNumber = reader.getLastLineNumber(); return createTag(); *************** *** 117,121 **** String endTagName = tag.getTagName(); int endTagBegin = pos ; ! int endTagEnd = endTagBegin + endTagName.length() + 2; endTag = new EndTag( new TagData( --- 117,121 ---- String endTagName = tag.getTagName(); int endTagBegin = pos ; ! int endTagEnd = endTagBegin + endTagName.length() + 2; endTag = new EndTag( new TagData( *************** *** 127,135 **** ); } ! private void createCorrectionEndTagBefore(Tag possibleEndTagCauser) { String endTagName = tag.getTagName(); int endTagBegin = possibleEndTagCauser.elementBegin(); ! int endTagEnd = endTagBegin + endTagName.length() + 2; possibleEndTagCauser.setTagBegin(endTagEnd+1); reader.addNextParsedNode(possibleEndTagCauser); --- 127,135 ---- ); } ! private void createCorrectionEndTagBefore(Tag possibleEndTagCauser) { String endTagName = tag.getTagName(); int endTagBegin = possibleEndTagCauser.elementBegin(); ! int endTagEnd = endTagBegin + endTagName.length() + 2; possibleEndTagCauser.setTagBegin(endTagEnd+1); reader.addNextParsedNode(possibleEndTagCauser); *************** *** 145,149 **** private Tag createTag() throws ParserException { ! CompositeTag newTag = (CompositeTag) scanner.createTag( --- 145,149 ---- private Tag createTag() throws ParserException { ! CompositeTag newTag = (CompositeTag) scanner.createTag( *************** *** 191,196 **** if (isXmlEndTag(tag)) { endTag = possibleEndTag; ! endTagFound = true; ! } } } --- 191,196 ---- if (isXmlEndTag(tag)) { endTag = possibleEndTag; ! endTagFound = true; ! } } } *************** *** 200,210 **** createCorrectionEndTagBefore(possibleEndTagCauser); ! endTagFound = true; } } private boolean isEndTagMissing(Tag possibleEndTag) { ! return ! scanner.isTagToBeEndedFor(possibleEndTag) || isSelfChildTagRecievedIncorrectly(possibleEndTag); } --- 200,210 ---- createCorrectionEndTagBefore(possibleEndTagCauser); ! endTagFound = true; } } private boolean isEndTagMissing(Tag possibleEndTag) { ! return ! scanner.isTagToBeEndedFor(possibleEndTag) || isSelfChildTagRecievedIncorrectly(possibleEndTag); } *************** *** 213,221 **** return ( !(possibleEndTag instanceof EndTag) && ! !scanner.isAllowSelfChildren() && possibleEndTag.getTagName().equals(tag.getTagName()) ); } ! public boolean isXmlEndTag(Tag tag) { String tagText = tag.getText(); --- 213,221 ---- return ( !(possibleEndTag instanceof EndTag) && ! !scanner.isAllowSelfChildren() && possibleEndTag.getTagName().equals(tag.getTagName()) ); } ! public boolean isXmlEndTag(Tag tag) { String tagText = tag.getText(); Index: ParserHelper.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/ParserHelper.java,v retrieving revision 1.15 retrieving revision 1.16 diff -C2 -d -r1.15 -r1.16 *** ParserHelper.java 8 Sep 2003 02:26:29 -0000 1.15 --- ParserHelper.java 10 Sep 2003 03:38:18 -0000 1.16 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 57,61 **** { URLConnection ret; ! try { --- 57,61 ---- { URLConnection ret; ! try { *************** *** 70,74 **** throw ex; } ! return (ret); } --- 70,74 ---- throw ex; } ! return (ret); } *************** *** 93,97 **** StringBuffer buffer; URLConnection ret; ! try { --- 93,97 ---- StringBuffer buffer; URLConnection ret; ! try { *************** *** 132,136 **** } } ! return (ret); } --- 132,136 ---- } } ! return (ret); } *************** *** 147,151 **** { String ret; ! try { --- 147,151 ---- { String ret; ! try { *************** *** 153,157 **** java.lang.reflect.Method method; Object object; ! cls = Class.forName ("java.nio.charset.Charset"); method = cls.getMethod ("forName", new Class[] { String.class }); --- 153,157 ---- java.lang.reflect.Method method; Object object; ! cls = Class.forName ("java.nio.charset.Charset"); method = cls.getMethod ("forName", new Class[] { String.class }); *************** *** 183,187 **** ret = _default; } ! return (ret); } --- 183,187 ---- ret = _default; } ! return (ret); } Index: ScriptScannerHelper.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/ScriptScannerHelper.java,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** ScriptScannerHelper.java 8 Sep 2003 02:26:29 -0000 1.10 --- ScriptScannerHelper.java 10 Sep 2003 03:38:18 -0000 1.11 *************** *** 11,15 **** //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU //Lesser General Public License for more details. ! // //You should have received a copy of the GNU Lesser General Public //License along with this library; if not, write to the Free Software --- 11,15 ---- //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU //Lesser General Public License for more details. ! // //You should have received a copy of the GNU Lesser General Public //License along with this library; if not, write to the Free Software *************** *** 18,27 **** //For any questions or suggestions, you can write to me at : //Email :so...@in... ! // ! //Postal Address : //Somik Raha //Extreme Programmer & Coach //Industrial Logic, Inc. ! //2583 Cedar Street, Berkeley, //CA 94708, USA //Website : http://www.industriallogic.com --- 18,27 ---- //For any questions or suggestions, you can write to me at : //Email :so...@in... ! // ! //Postal Address : //Somik Raha //Extreme Programmer & Coach //Industrial Logic, Inc. ! //2583 Cedar Street, Berkeley, //CA 94708, USA //Website : http://www.industriallogic.com *************** *** 50,54 **** private String url; private String currLine; ! public ScriptScannerHelper(Tag tag, String url, NodeReader nodeReader, String currLine, ScriptScanner scriptScanner) { this.reader = nodeReader; --- 50,54 ---- private String url; private String currLine; ! public ScriptScannerHelper(Tag tag, String url, NodeReader nodeReader, String currLine, ScriptScanner scriptScanner) { this.reader = nodeReader; *************** *** 68,72 **** return createScriptTagUsing(url, currLine, startLine); } ! private Tag createScriptTagUsing(String url, String currLine, int startLine) { return scriptScanner.createTag( --- 68,72 ---- return createScriptTagUsing(url, currLine, startLine); } ! private Tag createScriptTagUsing(String url, String currLine, int startLine) { return scriptScanner.createTag( *************** *** 102,106 **** String endTagName = tag.getTagName(); int endTagBegin = reader.getLastReadPosition()+1 ; ! int endTagEnd = endTagBegin + endTagName.length() + 2; endTag = new EndTag( new TagData( --- 102,106 ---- String endTagName = tag.getTagName(); int endTagBegin = reader.getLastReadPosition()+1 ; ! int endTagEnd = endTagBegin + endTagName.length() + 2; endTag = new EndTag( new TagData( *************** *** 121,125 **** scriptContents = new StringBuffer(); endTagFound = false; ! endTag = null; line = currLine; --- 121,125 ---- scriptContents = new StringBuffer(); endTagFound = false; ! endTag = null; line = currLine; *************** *** 132,136 **** startingPos = 0; } ! if (sameLine) sameLine = false; } --- 132,136 ---- startingPos = 0; } ! if (sameLine) sameLine = false; } *************** *** 141,145 **** endTagLoc = line.toUpperCase().indexOf(scriptScanner.getEndTag(),startingPos); findStartingAndEndingLocations(line); ! if (endTagLoc!=-1) { extractEndTagFrom(line); --- 141,145 ---- endTagLoc = line.toUpperCase().indexOf(scriptScanner.getEndTag(),startingPos); findStartingAndEndingLocations(line); ! if (endTagLoc!=-1) { extractEndTagFrom(line); *************** *** 150,154 **** private void continueParsing(String line) { ! if (sameLine) scriptContents.append( line.substring( --- 150,154 ---- private void continueParsing(String line) { ! if (sameLine) scriptContents.append( line.substring( *************** *** 165,169 **** endTagFound = true; endTag = (EndTag)EndTag.find(line,endTagLoc); ! if (sameLine) scriptContents.append( getCodeBetweenStartAndEndTags( --- 165,169 ---- endTagFound = true; endTag = (EndTag)EndTag.find(line,endTagLoc); ! if (sameLine) scriptContents.append( getCodeBetweenStartAndEndTags( *************** *** 176,180 **** scriptContents.append(line.substring(0,endTagLoc)); } ! reader.setPosInLine(endTag.elementEnd()); } --- 176,180 ---- scriptContents.append(line.substring(0,endTagLoc)); } ! reader.setPosInLine(endTag.elementEnd()); } *************** *** 183,187 **** while (endTagLoc>0 && isThisEndTagLocationFalseMatch(line, endTagLoc)) { startingPos = endTagLoc+scriptScanner.getEndTag().length(); ! endTagLoc = line.toUpperCase().indexOf(scriptScanner.getEndTag(), startingPos); } } --- 183,187 ---- while (endTagLoc>0 && isThisEndTagLocationFalseMatch(line, endTagLoc)) { startingPos = endTagLoc+scriptScanner.getEndTag().length(); ! endTagLoc = line.toUpperCase().indexOf(scriptScanner.getEndTag(), startingPos); } } *************** *** 192,196 **** int endTagLoc) throws ParserException { try { ! return line.substring( startTag.elementEnd()+1, --- 192,196 ---- int endTagLoc) throws ParserException { try { ! return line.substring( startTag.elementEnd()+1, *************** *** 208,213 **** private boolean isThisEndTagLocationFalseMatch(String line, int endTagLoc) { if (endTagLoc+scriptScanner.getEndTag().length() > line.length()-1) return false; ! char charAfterSuspectedEndTag = ! line.charAt(endTagLoc+scriptScanner.getEndTag().length()); return charAfterSuspectedEndTag=='"' || charAfterSuspectedEndTag=='\''; } --- 208,213 ---- private boolean isThisEndTagLocationFalseMatch(String line, int endTagLoc) { if (endTagLoc+scriptScanner.getEndTag().length() > line.length()-1) return false; ! char charAfterSuspectedEndTag = ! line.charAt(endTagLoc+scriptScanner.getEndTag().length()); return charAfterSuspectedEndTag=='"' || charAfterSuspectedEndTag=='\''; } Index: StringParser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/StringParser.java,v retrieving revision 1.38 retrieving revision 1.39 diff -C2 -d -r1.38 -r1.39 *** StringParser.java 8 Sep 2003 02:26:29 -0000 1.38 --- StringParser.java 10 Sep 2003 03:38:18 -0000 1.39 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 34,40 **** public class StringParser { ! private final static int BEFORE_PARSE_BEGINS_STATE=0; private final static int PARSE_HAS_BEGUN_STATE=1; ! private final static int PARSE_COMPLETED_STATE=2; private final static int PARSE_IGNORE_STATE=3; --- 34,40 ---- public class StringParser { ! private final static int BEFORE_PARSE_BEGINS_STATE=0; private final static int PARSE_HAS_BEGUN_STATE=1; ! private final static int PARSE_COMPLETED_STATE=2; private final static int PARSE_IGNORE_STATE=3; *************** *** 50,56 **** char ch; boolean ret; ! ret = false; ! if (pos + 2 <= line.length ()) if ('<' == line.charAt (pos)) --- 50,56 ---- char ch; boolean ret; ! ret = false; ! if (pos + 2 <= line.length ()) if ('<' == line.charAt (pos)) *************** *** 72,76 **** * @param balance_quotes If <code>true</code> enter ignoring state on * encountering quotes. ! */ public Node find(NodeReader reader,String input,int position, boolean balance_quotes) { --- 72,76 ---- * @param balance_quotes If <code>true</code> enter ignoring state on * encountering quotes. ! */ public Node find(NodeReader reader,String input,int position, boolean balance_quotes) { *************** *** 105,109 **** state = PARSE_IGNORE_STATE; } ! } if (state==BEFORE_PARSE_BEGINS_STATE) { --- 105,109 ---- state = PARSE_IGNORE_STATE; } ! } if (state==BEFORE_PARSE_BEGINS_STATE) { *************** *** 113,117 **** { textBuffer.append(input.charAt(i)); ! } // Patch by Cedric Rosa if (state==BEFORE_PARSE_BEGINS_STATE && i==inputLen-1) --- 113,117 ---- { textBuffer.append(input.charAt(i)); ! } // Patch by Cedric Rosa if (state==BEFORE_PARSE_BEGINS_STATE && i==inputLen-1) *************** *** 125,133 **** } while (input!=null && input.length()==0); ! if (input==null) { textEnd=i; state =PARSE_COMPLETED_STATE; ! } else { textBuffer.append(Parser.getLineSeparator()); --- 125,133 ---- } while (input!=null && input.length()==0); ! if (input==null) { textEnd=i; state =PARSE_COMPLETED_STATE; ! } else { textBuffer.append(Parser.getLineSeparator()); Index: TagParser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/TagParser.java,v retrieving revision 1.41 retrieving revision 1.42 diff -C2 -d -r1.41 -r1.42 *** TagParser.java 8 Sep 2003 02:26:29 -0000 1.41 --- TagParser.java 10 Sep 2003 03:38:18 -0000 1.42 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 42,51 **** public final static int TAG_FINISHED_PARSING_STATE=1<<3; public final static int TAG_ILLEGAL_STATE=1<<4; ! public final static int TAG_IGNORE_DATA_STATE=1<<5; public final static int TAG_IGNORE_BEGIN_TAG_STATE=1<<6; public final static int TAG_IGNORE_CHAR_SINGLE_QUOTE=1<<7; ! public final static String ENCOUNTERED_QUERY_MESSAGE = "TagParser : Encountered > after a query. Accepting without correction and continuing parsing"; ! private ParserFeedback feedback; --- 42,51 ---- public final static int TAG_FINISHED_PARSING_STATE=1<<3; public final static int TAG_ILLEGAL_STATE=1<<4; ! public final static int TAG_IGNORE_DATA_STATE=1<<5; public final static int TAG_IGNORE_BEGIN_TAG_STATE=1<<6; public final static int TAG_IGNORE_CHAR_SINGLE_QUOTE=1<<7; ! public final static String ENCOUNTERED_QUERY_MESSAGE = "TagParser : Encountered > after a query. Accepting without correction and continuing parsing"; ! private ParserFeedback feedback; *************** *** 62,67 **** Bool encounteredQuery = new Bool(false); ! while (i<tag.getTagLine().length() && ! state!=TAG_FINISHED_PARSING_STATE && state!=TAG_ILLEGAL_STATE ) --- 62,67 ---- Bool encounteredQuery = new Bool(false); ! while (i<tag.getTagLine().length() && ! state!=TAG_FINISHED_PARSING_STATE && state!=TAG_ILLEGAL_STATE ) *************** *** 106,111 **** private int checkBeginParsingState(int i, int state, char ch, Tag tag) { ! if (ch=='<' && ! (state==TAG_BEFORE_PARSING_STATE || state==TAG_ILLEGAL_STATE)) { --- 106,111 ---- private int checkBeginParsingState(int i, int state, char ch, Tag tag) { ! if (ch=='<' && ! (state==TAG_BEFORE_PARSING_STATE || state==TAG_ILLEGAL_STATE)) { *************** *** 123,127 **** return openTagPos > closeTagPos || (openTagPos ==-1 && closeTagPos!=-1); } ! private int checkFinishedState(Bool encounteredQuery, int i, int state, char ch, Tag tag, int pos) { if (ch=='>') --- 123,127 ---- return openTagPos > closeTagPos || (openTagPos ==-1 && closeTagPos!=-1); } ! private int checkFinishedState(Bool encounteredQuery, int i, int state, char ch, Tag tag, int pos) { if (ch=='>') *************** *** 142,146 **** // or it is a mistake in the html, in which case we need to correct it *sigh* if (isWellFormedTag(tag,pos)) return state; ! state = TAG_FINISHED_PARSING_STATE; tag.setTagEnd(i); --- 142,146 ---- // or it is a mistake in the html, in which case we need to correct it *sigh* if (isWellFormedTag(tag,pos)) return state; ! state = TAG_FINISHED_PARSING_STATE; tag.setTagEnd(i); *************** *** 149,153 **** // Remove all inverted commas. correctTag(tag); ! StringBuffer msg = new StringBuffer(); msg.append("HTMLTagParser : Encountered > inside inverted commas in line \n"); --- 149,153 ---- // Remove all inverted commas. correctTag(tag); ! StringBuffer msg = new StringBuffer(); msg.append("HTMLTagParser : Encountered > inside inverted commas in line \n"); *************** *** 162,167 **** } } else ! if (ch=='<' && ! state==TAG_BEGIN_PARSING_STATE && tag.getText().charAt(0)!='%' ) { --- 162,167 ---- } } else ! if (ch=='<' && ! state==TAG_BEGIN_PARSING_STATE && tag.getText().charAt(0)!='%' ) { *************** *** 173,180 **** private void checkIfAppendable(Bool encounteredQuery,int state, char ch, Tag tag) { ! if (state==TAG_IGNORE_DATA_STATE || ! state==TAG_BEGIN_PARSING_STATE || state==TAG_IGNORE_BEGIN_TAG_STATE) { ! if (ch=='?') encounteredQuery.setBoolean(true); tag.append(ch); --- 173,180 ---- private void checkIfAppendable(Bool encounteredQuery,int state, char ch, Tag tag) { ! if (state==TAG_IGNORE_DATA_STATE || ! state==TAG_BEGIN_PARSING_STATE || state==TAG_IGNORE_BEGIN_TAG_STATE) { ! if (ch=='?') encounteredQuery.setBoolean(true); tag.append(ch); *************** *** 183,188 **** private int checkIllegalState(int i, int state, char ch, Tag tag) { ! if (ch=='/' && i>0 && tag.getTagLine().charAt(i-1)=='<' && ! state!=TAG_IGNORE_DATA_STATE && state!=TAG_IGNORE_BEGIN_TAG_STATE) { --- 183,188 ---- private int checkIllegalState(int i, int state, char ch, Tag tag) { ! if (ch=='/' && i>0 && tag.getTagLine().charAt(i-1)=='<' && ! state!=TAG_IGNORE_DATA_STATE && state!=TAG_IGNORE_BEGIN_TAG_STATE) { *************** *** 192,196 **** return state; } ! public void correctTag(Tag tag) { String tempText = tag.getText(); --- 192,196 ---- return state; } ! public void correctTag(Tag tag) { String tempText = tag.getText(); *************** *** 205,209 **** StringBuffer result = insertInvertedCommasCorrectly(absorbedText); tag.setText(result.toString()); ! } public StringBuffer insertInvertedCommasCorrectly(StringBuffer absorbedText) { StringBuffer result = new StringBuffer(); --- 205,209 ---- StringBuffer result = insertInvertedCommasCorrectly(absorbedText); tag.setText(result.toString()); ! } public StringBuffer insertInvertedCommasCorrectly(StringBuffer absorbedText) { StringBuffer result = new StringBuffer(); *************** *** 225,229 **** } return result; ! } public static String pruneSpaces(String token) { int firstSpace; --- 225,229 ---- } return result; ! } public static String pruneSpaces(String token) { int firstSpace; *************** *** 238,244 **** token = token.substring(0,token.length()-1); lastSpace = token.lastIndexOf(' '); ! } return token; ! } /** --- 238,244 ---- token = token.substring(0,token.length()-1); lastSpace = token.lastIndexOf(' '); ! } return token; ! } /** *************** *** 264,276 **** return (state); ! } ! public int incrementCounter(int i, NodeReader reader, int state, Tag tag) { String nextLine = null; if ( ! (state==TAG_BEGIN_PARSING_STATE || state==TAG_IGNORE_DATA_STATE || state==TAG_IGNORE_BEGIN_TAG_STATE ! ) && i==tag.getTagLine().length()-1) { --- 264,276 ---- return (state); ! } ! public int incrementCounter(int i, NodeReader reader, int state, Tag tag) { String nextLine = null; if ( ! (state==TAG_BEGIN_PARSING_STATE || state==TAG_IGNORE_DATA_STATE || state==TAG_IGNORE_BEGIN_TAG_STATE ! ) && i==tag.getTagLine().length()-1) { *************** *** 291,295 **** tag.append(Parser.getLineSeparator()); } ! // Ensure blank lines are included in tag's 'tagLines' while (--numLinesAdvanced > 0) --- 291,295 ---- tag.append(Parser.getLineSeparator()); } ! // Ensure blank lines are included in tag's 'tagLines' while (--numLinesAdvanced > 0) *************** *** 299,317 **** tag.setTagLine(nextLine); i=-1; ! } return ++i; ! } // Class provided for thread safety in TagParser class Bool { private boolean boolValue; ! Bool(boolean boolValue) { ! this.boolValue = boolValue; } ! public void setBoolean(boolean boolValue) { this.boolValue = boolValue; } ! public boolean getBoolean() { return boolValue; --- 299,317 ---- tag.setTagLine(nextLine); i=-1; ! } return ++i; ! } // Class provided for thread safety in TagParser class Bool { private boolean boolValue; ! Bool(boolean boolValue) { ! this.boolValue = boolValue; } ! public void setBoolean(boolean boolValue) { this.boolValue = boolValue; } ! public boolean getBoolean() { return boolValue; |