Thread: [Htmlparser-cvs] htmlparser/src/org/htmlparser/parserHelper AttributeParser.java,1.36,1.37 Composite
Brought to you by:
derrickoswald
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper In directory sc8-pr-cvs1:/tmp/cvs-serv31228/parserHelper Modified Files: AttributeParser.java CompositeTagScannerHelper.java ParserHelper.java ScriptScannerHelper.java StringParser.java TagParser.java Log Message: Change tabs to spaces in all source files. Index: AttributeParser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/AttributeParser.java,v retrieving revision 1.36 retrieving revision 1.37 diff -C2 -d -r1.36 -r1.37 *** AttributeParser.java 24 Aug 2003 21:59:42 -0000 1.36 --- AttributeParser.java 3 Sep 2003 23:36:19 -0000 1.37 *************** *** 52,87 **** ! private Hashtable attributeTable; ! private String element; ! private String name; ! private String value; ! private String part; ! private String empty; ! private boolean equal; ! private StringTokenizer tokenizer; ! private boolean doubleQuote; ! private boolean singleQuote; ! private boolean ready; ! private String currentToken; ! private String tokenAccumulator; ! /** ! * Method to break the tag into pieces. ! * @param text All the text within the tag inside < and >. * @return A Hastable with elements containing the ! * pieces of the tag. The tag-name has the value field set to ! * the constant Tag.TAGNAME. In addition the tag-name is ! * stored into the Hashtable with the name Tag.TAGNAME ! * where the value is the name of the tag. ! * Tag parameters without value ! * has the value "". Parameters with value are represented ! * in the Hastable by a name/value pair. ! * As html is case insensitive but Hastable is not are all ! * names converted into UPPERCASE to the Hastable ! * E.g extract the href values from A-tag's and print them ! * <pre> ! * * Tag tag; ! * Hashtable h; ! * String tmp; * try { * NodeReader in = new NodeReader(new FileReader(path),2048); --- 52,87 ---- ! private Hashtable attributeTable; ! private String element; ! private String name; ! private String value; ! private String part; ! private String empty; ! private boolean equal; ! private StringTokenizer tokenizer; ! private boolean doubleQuote; ! private boolean singleQuote; ! private boolean ready; ! private String currentToken; ! private String tokenAccumulator; ! /** ! * Method to break the tag into pieces. ! * @param text All the text within the tag inside < and >. * @return A Hastable with elements containing the ! * pieces of the tag. The tag-name has the value field set to ! * the constant Tag.TAGNAME. In addition the tag-name is ! * stored into the Hashtable with the name Tag.TAGNAME ! * where the value is the name of the tag. ! * Tag parameters without value ! * has the value "". Parameters with value are represented ! * in the Hastable by a name/value pair. ! * As html is case insensitive but Hastable is not are all ! * names converted into UPPERCASE to the Hastable ! * E.g extract the href values from A-tag's and print them ! * <pre> ! * * Tag tag; ! * Hashtable h; ! * String tmp; * try { * NodeReader in = new NodeReader(new FileReader(path),2048); *************** *** 102,118 **** * ie.printStackTrace(); * } ! * </pre> ! * ! */ public Hashtable parseAttributes (String text) { ! attributeTable = new SpecialHashtable(); ! part = null; ! empty = null; name=null; value=null; element=null; ! equal = false; delim=DELIMETERS; ! tokenizer = new StringTokenizer(text,delim,true); while (true) { part=getNextPartUsing(delim); --- 102,118 ---- * ie.printStackTrace(); * } ! * </pre> ! * ! */ public Hashtable parseAttributes (String text) { ! attributeTable = new SpecialHashtable(); ! part = null; ! empty = null; name=null; value=null; element=null; ! equal = false; delim=DELIMETERS; ! tokenizer = new StringTokenizer(text,delim,true); while (true) { part=getNextPartUsing(delim); *************** *** 127,131 **** } else { ! processInvalidPart(); if (!tokenizer.hasMoreTokens ()) break; --- 127,131 ---- } else { ! processInvalidPart(); if (!tokenizer.hasMoreTokens ()) break; *************** *** 160,201 **** } ! private boolean isValid(String part) { ! return part != null && (0 < part.length ()); ! } ! private void process(String part) { ! if (name == null) { ! if (!part.substring(0,1).equals(" ")) { ! name = part; ! equal=true; ! } ! } ! else { ! if (equal){ ! if (part.equals("=")) { ! equal=false; ! delim=DELIMETERS_WITHOUT_EQUALS; value=Tag.NOTHING; ! } ! else { ! putDataIntoTable(attributeTable,name,Tag.NULLVALUE,false); ! name=part; ! value=null; ! } ! } ! if (!equal && !part.equals("=")) { ! value=part; ! putDataIntoTable(attributeTable,name,value,false); ! name=null; ! value=null; ! } ! } ! } private String getNextPartUsing(String delimiter) { ! tokenAccumulator = null; ! doubleQuote = false; ! singleQuote = false; ! ready = false; while (ready == false && tokenizer.hasMoreTokens()) { currentToken = tokenizer.nextToken(delimiter); --- 160,201 ---- } ! private boolean isValid(String part) { ! return part != null && (0 < part.length ()); ! } ! private void process(String part) { ! if (name == null) { ! if (!part.substring(0,1).equals(" ")) { ! name = part; ! equal=true; ! } ! } ! else { ! if (equal){ ! if (part.equals("=")) { ! equal=false; ! delim=DELIMETERS_WITHOUT_EQUALS; value=Tag.NOTHING; ! } ! else { ! putDataIntoTable(attributeTable,name,Tag.NULLVALUE,false); ! name=part; ! value=null; ! } ! } ! if (!equal && !part.equals("=")) { ! value=part; ! putDataIntoTable(attributeTable,name,value,false); ! name=null; ! value=null; ! } ! } ! } private String getNextPartUsing(String delimiter) { ! tokenAccumulator = null; ! doubleQuote = false; ! singleQuote = false; ! ready = false; while (ready == false && tokenizer.hasMoreTokens()) { currentToken = tokenizer.nextToken(delimiter); *************** *** 210,214 **** tokenAccumulator=""; } else { ! tokenAccumulator = currentToken; ready = isReadyWithNextPart(currentToken); } --- 210,214 ---- tokenAccumulator=""; } else { ! tokenAccumulator = currentToken; ready = isReadyWithNextPart(currentToken); } *************** *** 217,256 **** } ! private boolean isReadyWithNextPart(String currentToken) { ! boolean ready = false; ! if (isDelimeter(currentToken)) { ! if (currentToken.equals("=")){ ! ready=true; ! } ! } ! else { ! ready=true; ! } ! return ready; ! } ! private boolean isDelimeter(String token) { ! return delim.indexOf(tokenAccumulator)>=0; ! } ! ! private boolean isCurrentTokenSingleQuote() { ! return currentToken.charAt(0)==SINGLE_QUOTE; ! } ! private boolean isCurrentTokenDoubleQuote() { ! return currentToken.charAt(0)==DOUBLE_QUOTE; ! } ! private void combineTokensInsideSingleOrDoubleQuotes() { ! if (doubleQuote && currentToken.charAt(0)==DOUBLE_QUOTE){ ! doubleQuote= false; ! ready=true; ! } else if (singleQuote && currentToken.charAt(0)==SINGLE_QUOTE) { ! singleQuote=false; ! ready=true; ! }else { ! tokenAccumulator += currentToken; ! } ! } --- 217,256 ---- } ! private boolean isReadyWithNextPart(String currentToken) { ! boolean ready = false; ! if (isDelimeter(currentToken)) { ! if (currentToken.equals("=")){ ! ready=true; ! } ! } ! else { ! ready=true; ! } ! return ready; ! } ! private boolean isDelimeter(String token) { ! return delim.indexOf(tokenAccumulator)>=0; ! } ! ! private boolean isCurrentTokenSingleQuote() { ! return currentToken.charAt(0)==SINGLE_QUOTE; ! } ! private boolean isCurrentTokenDoubleQuote() { ! return currentToken.charAt(0)==DOUBLE_QUOTE; ! } ! private void combineTokensInsideSingleOrDoubleQuotes() { ! if (doubleQuote && currentToken.charAt(0)==DOUBLE_QUOTE){ ! doubleQuote= false; ! ready=true; ! } else if (singleQuote && currentToken.charAt(0)==SINGLE_QUOTE) { ! singleQuote=false; ! ready=true; ! }else { ! tokenAccumulator += currentToken; ! } ! } Index: CompositeTagScannerHelper.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/CompositeTagScannerHelper.java,v retrieving revision 1.43 retrieving revision 1.44 diff -C2 -d -r1.43 -r1.44 *** CompositeTagScannerHelper.java 24 Aug 2003 21:59:42 -0000 1.43 --- CompositeTagScannerHelper.java 3 Sep 2003 23:36:19 -0000 1.44 *************** *** 41,225 **** public class CompositeTagScannerHelper { ! private CompositeTagScanner scanner; ! private Tag tag; ! private String url; ! private NodeReader reader; ! private String currLine; ! private Tag endTag; ! private NodeList nodeList; ! private boolean endTagFound; ! private int startingLineNumber; ! private int endingLineNumber; ! private boolean balance_quotes; ! ! public CompositeTagScannerHelper( ! CompositeTagScanner scanner, ! Tag tag, ! String url, ! NodeReader reader, ! String currLine, boolean balance_quotes) { ! ! this.scanner = scanner; ! this.tag = tag; ! this.url = url; ! this.reader = reader; ! this.currLine = currLine; ! this.endTag = null; ! this.nodeList = new NodeList(); ! this.endTagFound = false; this.balance_quotes = balance_quotes; ! } ! public Tag scan() throws ParserException { ! this.startingLineNumber = reader.getLastLineNumber(); ! if (shouldCreateEndTagAndExit()) { ! return createEndTagAndRepositionReader(); ! } ! scanner.beforeScanningStarts(); ! Node currentNode = tag; ! ! doEmptyXmlTagCheckOn(currentNode); ! if (!endTagFound) { ! do { ! currentNode = reader.readElement(balance_quotes); ! if (currentNode==null) continue; ! currLine = reader.getCurrentLine(); ! if (currentNode instanceof Tag) ! doForceCorrectionCheckOn((Tag)currentNode); ! ! doEmptyXmlTagCheckOn(currentNode); ! if (!endTagFound) ! doChildAndEndTagCheckOn(currentNode); ! } ! while (currentNode!=null && !endTagFound); ! } ! if (endTag==null) { ! createCorrectionEndTagBefore(reader.getLastReadPosition()+1); ! } ! ! this.endingLineNumber = reader.getLastLineNumber(); ! return createTag(); ! } ! private boolean shouldCreateEndTagAndExit() { ! return scanner.shouldCreateEndTagAndExit(); ! } ! private Tag createEndTagAndRepositionReader() { ! createCorrectionEndTagBefore(tag.elementBegin()); ! reader.setPosInLine(tag.elementBegin()); ! reader.setDontReadNextLine(true); ! return endTag; ! } ! private void createCorrectionEndTagBefore(int pos) { ! String endTagName = tag.getTagName(); ! int endTagBegin = pos ; ! int endTagEnd = endTagBegin + endTagName.length() + 2; ! endTag = new EndTag( ! new TagData( ! endTagBegin, ! endTagEnd, ! endTagName, ! currLine ! ) ! ); ! } ! ! private void createCorrectionEndTagBefore(Tag possibleEndTagCauser) { ! String endTagName = tag.getTagName(); ! int endTagBegin = possibleEndTagCauser.elementBegin(); ! int endTagEnd = endTagBegin + endTagName.length() + 2; ! possibleEndTagCauser.setTagBegin(endTagEnd+1); ! reader.addNextParsedNode(possibleEndTagCauser); ! endTag = new EndTag( ! new TagData( ! endTagBegin, ! endTagEnd, ! endTagName, ! currLine ! ) ! ); ! } ! private Tag createTag() throws ParserException { ! CompositeTag newTag = ! (CompositeTag) ! scanner.createTag( ! new TagData( ! tag.elementBegin(), ! endTag.elementEnd(), ! startingLineNumber, ! endingLineNumber, ! tag.getText(), ! currLine, ! url, ! tag.isEmptyXmlTag() ! ), ! new CompositeTagData( ! tag,endTag,nodeList ! ) ! ); ! for (int i=0;i<newTag.getChildCount();i++) { ! Node child = newTag.childAt(i); ! child.setParent(newTag); ! } ! return newTag; ! } ! private void doChildAndEndTagCheckOn(Node currentNode) { ! if (currentNode instanceof EndTag) { ! EndTag possibleEndTag = (EndTag)currentNode; ! if (isExpectedEndTag(possibleEndTag)) { ! endTagFound = true; ! endTag = possibleEndTag; ! return; ! } ! } ! nodeList.add(currentNode); ! scanner.childNodeEncountered(currentNode); ! } ! private boolean isExpectedEndTag(EndTag possibleEndTag) { ! return possibleEndTag.getTagName().equals(tag.getTagName()); ! } ! private void doEmptyXmlTagCheckOn(Node currentNode) { ! if (currentNode instanceof Tag) { ! Tag possibleEndTag = (Tag)currentNode; ! if (isXmlEndTag(tag)) { ! endTag = possibleEndTag; ! endTagFound = true; ! } ! } ! } ! private void doForceCorrectionCheckOn(Tag possibleEndTagCauser) { ! if (isEndTagMissing(possibleEndTagCauser)) { ! createCorrectionEndTagBefore(possibleEndTagCauser); ! endTagFound = true; ! } ! } ! private boolean isEndTagMissing(Tag possibleEndTag) { ! return ! scanner.isTagToBeEndedFor(possibleEndTag) || ! isSelfChildTagRecievedIncorrectly(possibleEndTag); ! } ! private boolean isSelfChildTagRecievedIncorrectly(Tag possibleEndTag) { ! return ( ! !(possibleEndTag instanceof EndTag) && ! !scanner.isAllowSelfChildren() && ! possibleEndTag.getTagName().equals(tag.getTagName()) ! ); ! } ! ! public boolean isXmlEndTag(Tag tag) { ! String tagText = tag.getText(); ! int lastSlash = tagText.lastIndexOf("/"); ! return (lastSlash == tagText.length()-1 || tag.isEmptyXmlTag()) && tag.getText().indexOf("://")==-1; ! } } --- 41,225 ---- public class CompositeTagScannerHelper { ! private CompositeTagScanner scanner; ! private Tag tag; ! private String url; ! private NodeReader reader; ! private String currLine; ! private Tag endTag; ! private NodeList nodeList; ! private boolean endTagFound; ! private int startingLineNumber; ! private int endingLineNumber; ! private boolean balance_quotes; ! ! public CompositeTagScannerHelper( ! CompositeTagScanner scanner, ! Tag tag, ! String url, ! NodeReader reader, ! String currLine, boolean balance_quotes) { ! ! this.scanner = scanner; ! this.tag = tag; ! this.url = url; ! this.reader = reader; ! this.currLine = currLine; ! this.endTag = null; ! this.nodeList = new NodeList(); ! this.endTagFound = false; this.balance_quotes = balance_quotes; ! } ! public Tag scan() throws ParserException { ! this.startingLineNumber = reader.getLastLineNumber(); ! if (shouldCreateEndTagAndExit()) { ! return createEndTagAndRepositionReader(); ! } ! scanner.beforeScanningStarts(); ! Node currentNode = tag; ! ! doEmptyXmlTagCheckOn(currentNode); ! if (!endTagFound) { ! do { ! currentNode = reader.readElement(balance_quotes); ! if (currentNode==null) continue; ! currLine = reader.getCurrentLine(); ! if (currentNode instanceof Tag) ! doForceCorrectionCheckOn((Tag)currentNode); ! ! doEmptyXmlTagCheckOn(currentNode); ! if (!endTagFound) ! doChildAndEndTagCheckOn(currentNode); ! } ! while (currentNode!=null && !endTagFound); ! } ! if (endTag==null) { ! createCorrectionEndTagBefore(reader.getLastReadPosition()+1); ! } ! ! this.endingLineNumber = reader.getLastLineNumber(); ! return createTag(); ! } ! private boolean shouldCreateEndTagAndExit() { ! return scanner.shouldCreateEndTagAndExit(); ! } ! private Tag createEndTagAndRepositionReader() { ! createCorrectionEndTagBefore(tag.elementBegin()); ! reader.setPosInLine(tag.elementBegin()); ! reader.setDontReadNextLine(true); ! return endTag; ! } ! private void createCorrectionEndTagBefore(int pos) { ! String endTagName = tag.getTagName(); ! int endTagBegin = pos ; ! int endTagEnd = endTagBegin + endTagName.length() + 2; ! endTag = new EndTag( ! new TagData( ! endTagBegin, ! endTagEnd, ! endTagName, ! currLine ! ) ! ); ! } ! ! private void createCorrectionEndTagBefore(Tag possibleEndTagCauser) { ! String endTagName = tag.getTagName(); ! int endTagBegin = possibleEndTagCauser.elementBegin(); ! int endTagEnd = endTagBegin + endTagName.length() + 2; ! possibleEndTagCauser.setTagBegin(endTagEnd+1); ! reader.addNextParsedNode(possibleEndTagCauser); ! endTag = new EndTag( ! new TagData( ! endTagBegin, ! endTagEnd, ! endTagName, ! currLine ! ) ! ); ! } ! private Tag createTag() throws ParserException { ! CompositeTag newTag = ! (CompositeTag) ! scanner.createTag( ! new TagData( ! tag.elementBegin(), ! endTag.elementEnd(), ! startingLineNumber, ! endingLineNumber, ! tag.getText(), ! currLine, ! url, ! tag.isEmptyXmlTag() ! ), ! new CompositeTagData( ! tag,endTag,nodeList ! ) ! ); ! for (int i=0;i<newTag.getChildCount();i++) { ! Node child = newTag.childAt(i); ! child.setParent(newTag); ! } ! return newTag; ! } ! private void doChildAndEndTagCheckOn(Node currentNode) { ! if (currentNode instanceof EndTag) { ! EndTag possibleEndTag = (EndTag)currentNode; ! if (isExpectedEndTag(possibleEndTag)) { ! endTagFound = true; ! endTag = possibleEndTag; ! return; ! } ! } ! nodeList.add(currentNode); ! scanner.childNodeEncountered(currentNode); ! } ! private boolean isExpectedEndTag(EndTag possibleEndTag) { ! return possibleEndTag.getTagName().equals(tag.getTagName()); ! } ! private void doEmptyXmlTagCheckOn(Node currentNode) { ! if (currentNode instanceof Tag) { ! Tag possibleEndTag = (Tag)currentNode; ! if (isXmlEndTag(tag)) { ! endTag = possibleEndTag; ! endTagFound = true; ! } ! } ! } ! private void doForceCorrectionCheckOn(Tag possibleEndTagCauser) { ! if (isEndTagMissing(possibleEndTagCauser)) { ! createCorrectionEndTagBefore(possibleEndTagCauser); ! endTagFound = true; ! } ! } ! private boolean isEndTagMissing(Tag possibleEndTag) { ! return ! scanner.isTagToBeEndedFor(possibleEndTag) || ! isSelfChildTagRecievedIncorrectly(possibleEndTag); ! } ! private boolean isSelfChildTagRecievedIncorrectly(Tag possibleEndTag) { ! return ( ! !(possibleEndTag instanceof EndTag) && ! !scanner.isAllowSelfChildren() && ! possibleEndTag.getTagName().equals(tag.getTagName()) ! ); ! } ! ! public boolean isXmlEndTag(Tag tag) { ! String tagText = tag.getText(); ! int lastSlash = tagText.lastIndexOf("/"); ! return (lastSlash == tagText.length()-1 || tag.isEmptyXmlTag()) && tag.getText().indexOf("://")==-1; ! } } Index: ParserHelper.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/ParserHelper.java,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** ParserHelper.java 24 Aug 2003 21:59:42 -0000 1.13 --- ParserHelper.java 3 Sep 2003 23:36:19 -0000 1.14 *************** *** 42,106 **** public class ParserHelper implements Serializable { ! public ParserHelper() { ! super(); ! } ! /** ! * Opens a connection using the given url. ! * @param url The url to open. ! * @param feedback The ibject to use for messages or <code>null</code>. ! * @exception ParserException if an i/o exception occurs accessing the url. ! */ ! public static URLConnection openConnection (URL url, ParserFeedback feedback) ! throws ! ParserException ! { ! URLConnection ret; ! ! try ! { ! ret = url.openConnection (); ! } ! catch (IOException ioe) ! { ! String msg = "HTMLParser.openConnection() : Error in opening a connection to " + url.toExternalForm (); ! ParserException ex = new ParserException (msg, ioe); ! if (null != feedback) ! feedback.error (msg, ex); ! throw ex; ! } ! ! return (ret); ! } ! /** ! * Opens a connection based on a given string. ! * The string is either a file, in which case <code>file://localhost</code> ! * is prepended to a canonical path derived from the string, or a url that ! * begins with one of the known protocol strings, i.e. <code>http://</code>. * Embedded spaces are silently converted to %20 sequences. ! * @param string The name of a file or a url. ! * @param feedback The object to use for messages or <code>null</code> for no feedback. ! * @exception ParserException if the string is not a valid url or file. ! */ ! public static URLConnection openConnection (String string, ParserFeedback feedback) ! throws ! ParserException ! { ! final String prefix = "file://localhost"; ! String resource; ! URL url; ! StringBuffer buffer; ! URLConnection ret; ! ! try ! { ! url = new URL (LinkProcessor.fixSpaces (string)); ! ret = ParserHelper.openConnection (url, feedback); ! } ! catch (MalformedURLException murle) ! { // try it as a file ! try ! { File file = new File (string); resource = file.getCanonicalPath (); --- 42,106 ---- public class ParserHelper implements Serializable { ! public ParserHelper() { ! super(); ! } ! /** ! * Opens a connection using the given url. ! * @param url The url to open. ! * @param feedback The ibject to use for messages or <code>null</code>. ! * @exception ParserException if an i/o exception occurs accessing the url. ! */ ! public static URLConnection openConnection (URL url, ParserFeedback feedback) ! throws ! ParserException ! { ! URLConnection ret; ! ! try ! { ! ret = url.openConnection (); ! } ! catch (IOException ioe) ! { ! String msg = "HTMLParser.openConnection() : Error in opening a connection to " + url.toExternalForm (); ! ParserException ex = new ParserException (msg, ioe); ! if (null != feedback) ! feedback.error (msg, ex); ! throw ex; ! } ! ! return (ret); ! } ! /** ! * Opens a connection based on a given string. ! * The string is either a file, in which case <code>file://localhost</code> ! * is prepended to a canonical path derived from the string, or a url that ! * begins with one of the known protocol strings, i.e. <code>http://</code>. * Embedded spaces are silently converted to %20 sequences. ! * @param string The name of a file or a url. ! * @param feedback The object to use for messages or <code>null</code> for no feedback. ! * @exception ParserException if the string is not a valid url or file. ! */ ! public static URLConnection openConnection (String string, ParserFeedback feedback) ! throws ! ParserException ! { ! final String prefix = "file://localhost"; ! String resource; ! URL url; ! StringBuffer buffer; ! URLConnection ret; ! ! try ! { ! url = new URL (LinkProcessor.fixSpaces (string)); ! ret = ParserHelper.openConnection (url, feedback); ! } ! catch (MalformedURLException murle) ! { // try it as a file ! try ! { File file = new File (string); resource = file.getCanonicalPath (); *************** *** 110,189 **** buffer.append ("/"); buffer.append (resource); ! url = new URL (LinkProcessor.fixSpaces (buffer.toString ())); ! ret = ParserHelper.openConnection (url, feedback); ! if (null != feedback) ! feedback.info (url.toExternalForm ()); ! } ! catch (MalformedURLException murle2) ! { ! String msg = "HTMLParser.openConnection() : Error in opening a connection to " + string; ! ParserException ex = new ParserException (msg, murle2); ! if (null != feedback) ! feedback.error (msg, ex); ! throw ex; ! } ! catch (IOException ioe) { ! String msg = "HTMLParser.openConnection() : Error in opening a connection to " + string; ! ParserException ex = new ParserException (msg, ioe); ! if (null != feedback) ! feedback.error (msg, ex); ! throw ex; } ! } ! ! return (ret); ! } ! /** ! * Lookup a character set name. ! * <em>Vacuous for JVM's without <code>java.nio.charset</code>.</em> ! * This uses reflection so the code will still run under prior JDK's but ! * in that case the default is always returned. ! * @param name The name to look up. One of the aliases for a character set. ! * @param _default The name to return if the lookup fails. ! */ ! public static String findCharset (String name, String _default) ! { ! String ret; ! try ! { ! Class cls; ! java.lang.reflect.Method method; ! Object object; ! cls = Class.forName ("java.nio.charset.Charset"); ! method = cls.getMethod ("forName", new Class[] { String.class }); ! object = method.invoke (null, new Object[] { name }); ! method = cls.getMethod ("name", new Class[] { }); ! object = method.invoke (object, new Object[] { }); ! ret = (String)object; ! } ! catch (ClassNotFoundException cnfe) ! { ! // for reflection exceptions, assume the name is correct ! ret = name; ! } ! catch (NoSuchMethodException nsme) ! { ! // for reflection exceptions, assume the name is correct ! ret = name; ! } ! catch (IllegalAccessException ia) ! { ! // for reflection exceptions, assume the name is correct ! ret = name; ! } ! catch (java.lang.reflect.InvocationTargetException ita) ! { ! // java.nio.charset.IllegalCharsetNameException ! // and java.nio.charset.UnsupportedCharsetException ! // return the default ! ret = _default; ! } ! return (ret); ! } } --- 110,189 ---- buffer.append ("/"); buffer.append (resource); ! url = new URL (LinkProcessor.fixSpaces (buffer.toString ())); ! ret = ParserHelper.openConnection (url, feedback); ! if (null != feedback) ! feedback.info (url.toExternalForm ()); ! } ! catch (MalformedURLException murle2) { ! String msg = "HTMLParser.openConnection() : Error in opening a connection to " + string; ! ParserException ex = new ParserException (msg, murle2); ! if (null != feedback) ! feedback.error (msg, ex); ! throw ex; } ! catch (IOException ioe) ! { ! String msg = "HTMLParser.openConnection() : Error in opening a connection to " + string; ! ParserException ex = new ParserException (msg, ioe); ! if (null != feedback) ! feedback.error (msg, ex); ! throw ex; ! } ! } ! ! return (ret); ! } ! /** ! * Lookup a character set name. ! * <em>Vacuous for JVM's without <code>java.nio.charset</code>.</em> ! * This uses reflection so the code will still run under prior JDK's but ! * in that case the default is always returned. ! * @param name The name to look up. One of the aliases for a character set. ! * @param _default The name to return if the lookup fails. ! */ ! public static String findCharset (String name, String _default) ! { ! String ret; ! try ! { ! Class cls; ! java.lang.reflect.Method method; ! Object object; ! cls = Class.forName ("java.nio.charset.Charset"); ! method = cls.getMethod ("forName", new Class[] { String.class }); ! object = method.invoke (null, new Object[] { name }); ! method = cls.getMethod ("name", new Class[] { }); ! object = method.invoke (object, new Object[] { }); ! ret = (String)object; ! } ! catch (ClassNotFoundException cnfe) ! { ! // for reflection exceptions, assume the name is correct ! ret = name; ! } ! catch (NoSuchMethodException nsme) ! { ! // for reflection exceptions, assume the name is correct ! ret = name; ! } ! catch (IllegalAccessException ia) ! { ! // for reflection exceptions, assume the name is correct ! ret = name; ! } ! catch (java.lang.reflect.InvocationTargetException ita) ! { ! // java.nio.charset.IllegalCharsetNameException ! // and java.nio.charset.UnsupportedCharsetException ! // return the default ! ret = _default; ! } ! return (ret); ! } } Index: ScriptScannerHelper.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/ScriptScannerHelper.java,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** ScriptScannerHelper.java 24 Aug 2003 21:59:42 -0000 1.8 --- ScriptScannerHelper.java 3 Sep 2003 23:36:19 -0000 1.9 *************** *** 38,214 **** public class ScriptScannerHelper { ! private int endTagLoc; ! private Tag endTag; ! private Tag startTag; ! private int startingPos; ! private boolean sameLine; ! private boolean endTagFound; ! private NodeReader reader; ! private StringBuffer scriptContents; ! private ScriptScanner scriptScanner; ! private Tag tag; ! private String url; ! private String currLine; ! ! public ScriptScannerHelper(Tag tag, String url, NodeReader nodeReader, String currLine, ScriptScanner scriptScanner) { ! this.reader = nodeReader; ! this.scriptScanner = scriptScanner; ! this.tag = tag; ! this.url = url; ! this.currLine = currLine; ! } ! public Tag scan() throws ParserException { ! int startLine = reader.getLastLineNumber(); ! startTag = tag; ! extractScriptTagFrom(currLine); ! if (isScriptEndTagNotFound()) { ! createScriptEndTag(tag, currLine); ! } ! return createScriptTagUsing(url, currLine, startLine); ! } ! ! private Tag createScriptTagUsing(String url, String currLine, int startLine) { ! return scriptScanner.createTag( ! new TagData( ! startTag.elementBegin(), ! endTag.elementEnd(), ! startLine, ! reader.getLastLineNumber(), ! startTag.getText(), ! currLine, ! url, ! false ! ), new CompositeTagData( ! startTag,endTag,createChildrenNodeList() ! ) ! ); ! } ! private NodeList createChildrenNodeList() { ! NodeList childrenNodeList = new NodeList(); ! childrenNodeList.add( ! new StringNode( ! scriptContents, ! startTag.elementEnd(), ! endTag.elementBegin()-1 ! ) ! ); ! return childrenNodeList; ! } ! private void createScriptEndTag(Tag tag, String currLine) { ! // If end tag doesn't exist, create one ! String endTagName = tag.getTagName(); ! int endTagBegin = reader.getLastReadPosition()+1 ; ! int endTagEnd = endTagBegin + endTagName.length() + 2; ! endTag = new EndTag( ! new TagData( ! endTagBegin, ! endTagEnd, ! endTagName, ! currLine ! ) ! ); ! } ! private boolean isScriptEndTagNotFound() { ! return endTag == null; ! } ! private void extractScriptTagFrom(String currLine) throws ParserException { ! String line = null; ! scriptContents = new StringBuffer(); ! endTagFound = false; ! ! endTag = null; ! line = currLine; ! sameLine = true; ! startingPos = startTag.elementEnd(); ! do { ! doExtractionOfScriptContentsFrom(line); ! if (!endTagFound) { ! line = reader.getNextLine(); ! startingPos = 0; ! } ! if (sameLine) ! sameLine = false; ! } ! while (line!=null && !endTagFound); ! } ! private void doExtractionOfScriptContentsFrom(String line) throws ParserException { ! endTagLoc = line.toUpperCase().indexOf(scriptScanner.getEndTag(),startingPos); ! findStartingAndEndingLocations(line); ! ! if (endTagLoc!=-1) { ! extractEndTagFrom(line); ! } else { ! continueParsing(line); ! } ! } ! private void continueParsing(String line) { ! if (sameLine) ! scriptContents.append( ! line.substring( ! startTag.elementEnd()+1 ! ) ! ); ! else { ! scriptContents.append(Parser.getLineSeparator()); ! scriptContents.append(line); ! } ! } ! private void extractEndTagFrom(String line) throws ParserException { ! endTagFound = true; ! endTag = (EndTag)EndTag.find(line,endTagLoc); ! if (sameLine) ! scriptContents.append( ! getCodeBetweenStartAndEndTags( ! line, ! startTag, ! endTagLoc) ! ); ! else { ! scriptContents.append(Parser.getLineSeparator()); ! scriptContents.append(line.substring(0,endTagLoc)); ! } ! ! reader.setPosInLine(endTag.elementEnd()); ! } ! private void findStartingAndEndingLocations(String line) { ! while (endTagLoc>0 && isThisEndTagLocationFalseMatch(line, endTagLoc)) { ! startingPos = endTagLoc+scriptScanner.getEndTag().length(); ! endTagLoc = line.toUpperCase().indexOf(scriptScanner.getEndTag(), startingPos); ! } ! } ! public String getCodeBetweenStartAndEndTags( ! String line, ! Tag startTag, ! int endTagLoc) throws ParserException { ! try { ! ! return line.substring( ! startTag.elementEnd()+1, ! endTagLoc ! ); ! } ! catch (Exception e) { ! StringBuffer msg = new StringBuffer("Error in getCodeBetweenStartAndEndTags():\n"); ! msg.append("substring starts at: "+(startTag.elementEnd()+1)).append("\n"); ! msg.append("substring ends at: "+(endTagLoc)); ! throw new ParserException(msg.toString(),e); ! } ! } ! private boolean isThisEndTagLocationFalseMatch(String line, int endTagLoc) { ! if (endTagLoc+scriptScanner.getEndTag().length() > line.length()-1) return false; ! char charAfterSuspectedEndTag = ! line.charAt(endTagLoc+scriptScanner.getEndTag().length()); ! return charAfterSuspectedEndTag=='"' || charAfterSuspectedEndTag=='\''; ! } } --- 38,214 ---- public class ScriptScannerHelper { ! private int endTagLoc; ! private Tag endTag; ! private Tag startTag; ! private int startingPos; ! private boolean sameLine; ! private boolean endTagFound; ! private NodeReader reader; ! private StringBuffer scriptContents; ! private ScriptScanner scriptScanner; ! private Tag tag; ! private String url; ! private String currLine; ! ! public ScriptScannerHelper(Tag tag, String url, NodeReader nodeReader, String currLine, ScriptScanner scriptScanner) { ! this.reader = nodeReader; ! this.scriptScanner = scriptScanner; ! this.tag = tag; ! this.url = url; ! this.currLine = currLine; ! } ! public Tag scan() throws ParserException { ! int startLine = reader.getLastLineNumber(); ! startTag = tag; ! extractScriptTagFrom(currLine); ! if (isScriptEndTagNotFound()) { ! createScriptEndTag(tag, currLine); ! } ! return createScriptTagUsing(url, currLine, startLine); ! } ! ! private Tag createScriptTagUsing(String url, String currLine, int startLine) { ! return scriptScanner.createTag( ! new TagData( ! startTag.elementBegin(), ! endTag.elementEnd(), ! startLine, ! reader.getLastLineNumber(), ! startTag.getText(), ! currLine, ! url, ! false ! ), new CompositeTagData( ! startTag,endTag,createChildrenNodeList() ! ) ! ); ! } ! private NodeList createChildrenNodeList() { ! NodeList childrenNodeList = new NodeList(); ! childrenNodeList.add( ! new StringNode( ! scriptContents, ! startTag.elementEnd(), ! endTag.elementBegin()-1 ! ) ! ); ! return childrenNodeList; ! } ! private void createScriptEndTag(Tag tag, String currLine) { ! // If end tag doesn't exist, create one ! String endTagName = tag.getTagName(); ! int endTagBegin = reader.getLastReadPosition()+1 ; ! int endTagEnd = endTagBegin + endTagName.length() + 2; ! endTag = new EndTag( ! new TagData( ! endTagBegin, ! endTagEnd, ! endTagName, ! currLine ! ) ! ); ! } ! private boolean isScriptEndTagNotFound() { ! return endTag == null; ! } ! private void extractScriptTagFrom(String currLine) throws ParserException { ! String line = null; ! scriptContents = new StringBuffer(); ! endTagFound = false; ! ! endTag = null; ! line = currLine; ! sameLine = true; ! startingPos = startTag.elementEnd(); ! do { ! doExtractionOfScriptContentsFrom(line); ! if (!endTagFound) { ! line = reader.getNextLine(); ! startingPos = 0; ! } ! if (sameLine) ! sameLine = false; ! } ! while (line!=null && !endTagFound); ! } ! private void doExtractionOfScriptContentsFrom(String line) throws ParserException { ! endTagLoc = line.toUpperCase().indexOf(scriptScanner.getEndTag(),startingPos); ! findStartingAndEndingLocations(line); ! ! if (endTagLoc!=-1) { ! extractEndTagFrom(line); ! } else { ! continueParsing(line); ! } ! } ! private void continueParsing(String line) { ! if (sameLine) ! scriptContents.append( ! line.substring( ! startTag.elementEnd()+1 ! ) ! ); ! else { ! scriptContents.append(Parser.getLineSeparator()); ! scriptContents.append(line); ! } ! } ! private void extractEndTagFrom(String line) throws ParserException { ! endTagFound = true; ! endTag = (EndTag)EndTag.find(line,endTagLoc); ! if (sameLine) ! scriptContents.append( ! getCodeBetweenStartAndEndTags( ! line, ! startTag, ! endTagLoc) ! ); ! else { ! scriptContents.append(Parser.getLineSeparator()); ! scriptContents.append(line.substring(0,endTagLoc)); ! } ! ! reader.setPosInLine(endTag.elementEnd()); ! } ! private void findStartingAndEndingLocations(String line) { ! while (endTagLoc>0 && isThisEndTagLocationFalseMatch(line, endTagLoc)) { ! startingPos = endTagLoc+scriptScanner.getEndTag().length(); ! endTagLoc = line.toUpperCase().indexOf(scriptScanner.getEndTag(), startingPos); ! } ! } ! public String getCodeBetweenStartAndEndTags( ! String line, ! Tag startTag, ! int endTagLoc) throws ParserException { ! try { ! ! return line.substring( ! startTag.elementEnd()+1, ! endTagLoc ! ); ! } ! catch (Exception e) { ! StringBuffer msg = new StringBuffer("Error in getCodeBetweenStartAndEndTags():\n"); ! msg.append("substring starts at: "+(startTag.elementEnd()+1)).append("\n"); ! msg.append("substring ends at: "+(endTagLoc)); ! throw new ParserException(msg.toString(),e); ! } ! } ! private boolean isThisEndTagLocationFalseMatch(String line, int endTagLoc) { ! if (endTagLoc+scriptScanner.getEndTag().length() > line.length()-1) return false; ! char charAfterSuspectedEndTag = ! line.charAt(endTagLoc+scriptScanner.getEndTag().length()); ! return charAfterSuspectedEndTag=='"' || charAfterSuspectedEndTag=='\''; ! } } Index: StringParser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/StringParser.java,v retrieving revision 1.36 retrieving revision 1.37 diff -C2 -d -r1.36 -r1.37 *** StringParser.java 24 Aug 2003 21:59:42 -0000 1.36 --- StringParser.java 3 Sep 2003 23:36:19 -0000 1.37 *************** *** 34,41 **** public class StringParser { ! private final static int BEFORE_PARSE_BEGINS_STATE=0; ! private final static int PARSE_HAS_BEGUN_STATE=1; ! private final static int PARSE_COMPLETED_STATE=2; ! private final static int PARSE_IGNORE_STATE=3; /** --- 34,41 ---- public class StringParser { ! private final static int BEFORE_PARSE_BEGINS_STATE=0; ! private final static int PARSE_HAS_BEGUN_STATE=1; ! private final static int PARSE_COMPLETED_STATE=2; ! private final static int PARSE_IGNORE_STATE=3; /** *************** *** 65,89 **** } ! /** ! * Locate the StringNode within the input string, by parsing from the given position ! * @param reader HTML reader to be provided so as to allow reading of next line ! * @param input Input String ! * @param position Position to start parsing from ! * @param balance_quotes If <code>true</code> enter ignoring state on * encountering quotes. ! */ ! public Node find(NodeReader reader,String input,int position, boolean balance_quotes) ! { ! StringBuffer textBuffer = new StringBuffer(); ! int state = BEFORE_PARSE_BEGINS_STATE; ! int textBegin=position; ! int textEnd=position; ! int inputLen = input.length(); ! char ch; char ignore_ender = '\"'; ! for (int i=position;(i<inputLen && state!=PARSE_COMPLETED_STATE);i++) ! { ! ch = input.charAt(i); ! if (ch=='<' && state!=PARSE_IGNORE_STATE) { if (beginTag (input, i)) --- 65,89 ---- } ! /** ! * Locate the StringNode within the input string, by parsing from the given position ! * @param reader HTML reader to be provided so as to allow reading of next line ! * @param input Input String ! * @param position Position to start parsing from ! * @param balance_quotes If <code>true</code> enter ignoring state on * encountering quotes. ! */ ! public Node find(NodeReader reader,String input,int position, boolean balance_quotes) ! { ! StringBuffer textBuffer = new StringBuffer(); ! int state = BEFORE_PARSE_BEGINS_STATE; ! int textBegin=position; ! int textEnd=position; ! int inputLen = input.length(); ! char ch; char ignore_ender = '\"'; ! for (int i=position;(i<inputLen && state!=PARSE_COMPLETED_STATE);i++) ! { ! ch = input.charAt(i); ! if (ch=='<' && state!=PARSE_IGNORE_STATE) { if (beginTag (input, i)) *************** *** 92,142 **** textEnd=i-1; } ! } ! if (balance_quotes && (ch=='\'' || ch=='"')) { ! if (state==PARSE_IGNORE_STATE) { if (ch == ignore_ender) state=PARSE_HAS_BEGUN_STATE; } ! else { ignore_ender = ch; state = PARSE_IGNORE_STATE; ! } ! } ! if (state==BEFORE_PARSE_BEGINS_STATE) ! { ! state=PARSE_HAS_BEGUN_STATE; ! } ! if (state==PARSE_HAS_BEGUN_STATE || state==PARSE_IGNORE_STATE) ! { ! textBuffer.append(input.charAt(i)); ! } ! // Patch by Cedric Rosa ! if (state==BEFORE_PARSE_BEGINS_STATE && i==inputLen-1) ! state=PARSE_HAS_BEGUN_STATE; ! if (state==PARSE_HAS_BEGUN_STATE && i==inputLen-1) ! { ! do { ! input = reader.getNextLine(); ! if (input!=null && input.length()==0) ! textBuffer.append(Parser.getLineSeparator()); ! } ! while (input!=null && input.length()==0); ! ! if (input==null) { ! textEnd=i; ! state =PARSE_COMPLETED_STATE; ! ! } else { ! textBuffer.append(Parser.getLineSeparator()); ! inputLen = input.length(); ! i=-1; ! } ! } ! } ! return reader.getParser().getStringNodeFactory().createStringNode(textBuffer, textBegin, textEnd); ! } } --- 92,142 ---- textEnd=i-1; } ! } ! if (balance_quotes && (ch=='\'' || ch=='"')) { ! if (state==PARSE_IGNORE_STATE) { if (ch == ignore_ender) state=PARSE_HAS_BEGUN_STATE; } ! else { ignore_ender = ch; state = PARSE_IGNORE_STATE; ! } ! } ! if (state==BEFORE_PARSE_BEGINS_STATE) ! { ! state=PARSE_HAS_BEGUN_STATE; ! } ! if (state==PARSE_HAS_BEGUN_STATE || state==PARSE_IGNORE_STATE) ! { ! textBuffer.append(input.charAt(i)); ! } ! // Patch by Cedric Rosa ! if (state==BEFORE_PARSE_BEGINS_STATE && i==inputLen-1) ! state=PARSE_HAS_BEGUN_STATE; ! if (state==PARSE_HAS_BEGUN_STATE && i==inputLen-1) ! { ! do { ! input = reader.getNextLine(); ! if (input!=null && input.length()==0) ! textBuffer.append(Parser.getLineSeparator()); ! } ! while (input!=null && input.length()==0); ! ! if (input==null) { ! textEnd=i; ! state =PARSE_COMPLETED_STATE; ! ! } else { ! textBuffer.append(Parser.getLineSeparator()); ! inputLen = input.length(); ! i=-1; ! } ! } ! } ! return reader.getParser().getStringNodeFactory().createStringNode(textBuffer, textBegin, textEnd); ! } } Index: TagParser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/TagParser.java,v retrieving revision 1.39 retrieving revision 1.40 diff -C2 -d -r1.39 -r1.40 *** TagParser.java 24 Aug 2003 21:59:42 -0000 1.39 --- TagParser.java 3 Sep 2003 23:36:19 -0000 1.40 *************** *** 38,244 **** public class TagParser { ! public final static int TAG_BEFORE_PARSING_STATE=1; ! public final static int TAG_BEGIN_PARSING_STATE=1<<2; ! public final static int TAG_FINISHED_PARSING_STATE=1<<3; ! public final static int TAG_ILLEGAL_STATE=1<<4; ! public final static int TAG_IGNORE_DATA_STATE=1<<5; ! public final static int TAG_IGNORE_BEGIN_TAG_STATE=1<<6; ! public final static int TAG_IGNORE_CHAR_SINGLE_QUOTE=1<<7; ! ! public final static String ENCOUNTERED_QUERY_MESSAGE = "TagParser : Encountered > after a query. Accepting without correction and continuing parsing"; ! ! private ParserFeedback feedback; ! public TagParser(ParserFeedback feedback) { ! this.feedback = feedback; ! } ! public Tag find(NodeReader reader,String input,int position) { ! int state = TAG_BEFORE_PARSING_STATE; ! int i=position; ! char ch; ! char[] ignorechar = new char[1]; // holds the character we're looking for when in TAG_IGNORE_DATA_STATE ! Tag tag = new Tag(new TagData(position, 0, reader.getLastLineNumber(), 0, "", input, "", false)); ! Bool encounteredQuery = new Bool(false); ! while (i<tag.getTagLine().length() && ! state!=TAG_FINISHED_PARSING_STATE && ! state!=TAG_ILLEGAL_STATE ! ) ! { ! ch = tag.getTagLine().charAt(i); ! state = automataInput(encounteredQuery, i, state, ch, tag, i, ignorechar); ! i = incrementCounter(i, reader, state, tag); ! } ! if (state==TAG_FINISHED_PARSING_STATE) { ! String tagLine = tag.getTagLine(); ! if (i>1 && tagLine.charAt(i-2)=='/') { ! tag.setEmptyXmlTag(true); ! String tagContents = tag.getText(); ! tag.setText(tagContents.substring(0,tagContents.length()-1)); ! } ! return tag; ! } else ! return null; ! } ! private int automataInput(Bool encounteredQuery, int i, int state,char ch, Tag tag, int pos, char[] ignorechar) { ! state = checkIllegalState(i, state, ch, tag); ! state = checkFinishedState(encounteredQuery, i, state, ch, tag, pos); ! state = toggleIgnoringState(state, ch, ignorechar); ! if (state==TAG_BEFORE_PARSING_STATE && ch!='<') { ! state= TAG_ILLEGAL_STATE; ! } ! if (state==TAG_IGNORE_DATA_STATE && ch=='<') { ! // If the next tag char is is close tag, then ! // this is legal, we should continue ! if (!isWellFormedTag(tag,pos)) ! state = TAG_IGNORE_BEGIN_TAG_STATE; ! } ! if (state==TAG_IGNORE_BEGIN_TAG_STATE && ch=='>') { ! state = TAG_IGNORE_DATA_STATE; ! } ! checkIfAppendable(encounteredQuery, state, ch, tag); ! state = checkBeginParsingState(i, state, ch, tag); ! return state; ! } ! private int checkBeginParsingState(int i, int state, char ch, Tag tag) { ! if (ch=='<' && ! (state==TAG_BEFORE_PARSING_STATE || ! state==TAG_ILLEGAL_STATE)) ! { ! // Transition from State 0 to State 1 - Record data till > is encountered ! tag.setTagBegin(i); ! state = TAG_BEGIN_PARSING_STATE; ! } ! return state; ! } ! private boolean isWellFormedTag(Tag tag, int pos) { ! String inputLine = tag.getTagLine(); ! int closeTagPos = inputLine.indexOf('>',pos+1); ! int openTagPos = inputLine.indexOf('<',pos+1); ! return openTagPos > closeTagPos || (openTagPos ==-1 && closeTagPos!=-1); ! } ! ! private int checkFinishedState(Bool encounteredQuery, int i, int state, char ch, Tag tag, int pos) { ! if (ch=='>') ! { ! if (state==TAG_BEGIN_PARSING_STATE) ! { ! state = TAG_FINISHED_PARSING_STATE; ! tag.setTagEnd(i); ! } ! else ! if (state==TAG_IGNORE_DATA_STATE) { ! if (encounteredQuery.getBoolean()) { ! encounteredQuery.setBoolean(false); ! feedback.info(ENCOUNTERED_QUERY_MESSAGE); ! return state; ! } ! // Now, either this is a valid > input, and should be ignored, ! // or it is a mistake in the html, in which case we need to correct it *sigh* ! if (isWellFormedTag(tag,pos)) return state; ! ! state = TAG_FINISHED_PARSING_STATE; ! tag.setTagEnd(i); ! // Do Correction ! // Correct the tag - assuming its grouped into name value pairs ! // Remove all inverted commas. ! correctTag(tag); ! ! StringBuffer msg = new StringBuffer(); ! msg.append("HTMLTagParser : Encountered > inside inverted commas in line \n"); ! msg.append(tag.getTagLine()); ! msg.append(", location "); ! msg.append(i); ! msg.append("\n"); ! for (int j=0;j<i;j++) msg.append(' '); ! msg.append('^'); ! msg.append("\nAutomatically corrected."); ! feedback.warning(msg.toString()); ! } ! } else ! if (ch=='<' && ! state==TAG_BEGIN_PARSING_STATE && ! tag.getText().charAt(0)!='%' ! ) { ! state = TAG_FINISHED_PARSING_STATE; ! tag.setTagEnd(i-1);i--; ! } ! return state; ! } ! private void checkIfAppendable(Bool encounteredQuery,int state, char ch, Tag tag) { ! if (state==TAG_IGNORE_DATA_STATE || ! state==TAG_BEGIN_PARSING_STATE || ! state==TAG_IGNORE_BEGIN_TAG_STATE) { ! if (ch=='?') ! encounteredQuery.setBoolean(true); ! tag.append(ch); ! } ! } ! private int checkIllegalState(int i, int state, char ch, Tag tag) { ! if (ch=='/' && i>0 && tag.getTagLine().charAt(i-1)=='<' && ! state!=TAG_IGNORE_DATA_STATE && ! state!=TAG_IGNORE_BEGIN_TAG_STATE) ! { ! state = TAG_ILLEGAL_STATE; ! } ! return state; ! } ! ! public void correctTag(Tag tag) { ! String tempText = tag.getText(); ! StringBuffer absorbedText = new StringBuffer(); ! char c; ! for (int j=0;j<tempText.length();j++) { ! c = tempText.charAt(j); ! if (c!='"') ! absorbedText.append(c); ! } ! // Go into the next stage. ! StringBuffer result = insertInvertedCommasCorrectly(absorbedText); ! tag.setText(result.toString()); ! } ! public StringBuffer insertInvertedCommasCorrectly(StringBuffer absorbedText) { ! StringBuffer result = new StringBuffer(); ! StringTokenizer tok = new StringTokenizer(absorbedText.toString(),"=",false); ! String token; ! token= (String)tok.nextToken(); ! result.append(token+"="); ! for (;tok.hasMoreTokens();) { ! token= (String)tok.nextToken(); ! token = pruneSpaces(token); ! result.append('"'); ! int lastIndex = token.lastIndexOf(' '); ! if (lastIndex!=-1 && tok.hasMoreTokens()) { ! result.append(token.substring(0,lastIndex)); ! result.append('"'); ! result.append(token.substring(lastIndex,token.length())); ! } else result.append(token+'"'); ! if (tok.hasMoreTokens()) result.append("="); ! } ! return result; ! } ! public static String pruneSpaces(String token) { ! int firstSpace; ! int lastSpace; ! firstSpace = token.indexOf(' '); ! while (firstSpace==0) { ! token = token.substring(1,token.length()); ! firstSpace = token.indexOf(' '); ! } ! lastSpace = token.lastIndexOf(' '); ! while (lastSpace==token.length()-1) { ! token = token.substring(0,token.length()-1); ! lastSpace = token.lastIndexOf(' '); ! } ! return token; ! } /** --- 38,244 ---- public class TagParser { ! public final static int TAG_BEFORE_PARSING_STATE=1; ! public final static int TAG_BEGIN_PARSING_STATE=1<<2; ! public final static int TAG_FINISHED_PARSING_STATE=1<<3; ! public final static int TAG_ILLEGAL_STATE=1<<4; ! public final static int TAG_IGNORE_DATA_STATE=1<<5; ! public final static int TAG_IGNORE_BEGIN_TAG_STATE=1<<6; ! public final static int TAG_IGNORE_CHAR_SINGLE_QUOTE=1<<7; ! ! public final static String ENCOUNTERED_QUERY_MESSAGE = "TagParser : Encountered > after a query. Accepting without correction and continuing parsing"; ! ! private ParserFeedback feedback; ! public TagParser(ParserFeedback feedback) { ! this.feedback = feedback; ! } ! public Tag find(NodeReader reader,String input,int position) { ! int state = TAG_BEFORE_PARSING_STATE; ! int i=position; ! char ch; ! char[] ignorechar = new char[1]; // holds the character we're looking for when in TAG_IGNORE_DATA_STATE ! Tag tag = new Tag(new TagData(position, 0, reader.getLastLineNumber(), 0, "", input, "", false)); ! Bool encounteredQuery = new Bool(false); ! while (i<tag.getTagLine().length() && ! state!=TAG_FINISHED_PARSING_STATE && ! state!=TAG_ILLEGAL_STATE ! ) ! { ! ch = tag.getTagLine().charAt(i); ! state = automataInput(encounteredQuery, i, state, ch, tag, i, ignorechar); ! i = incrementCounter(i, reader, state, tag); ! } ! if (state==TAG_FINISHED_PARSING_STATE) { ! String tagLine = tag.getTagLine(); ! if (i>1 && tagLine.charAt(i-2)=='/') { ! tag.setEmptyXmlTag(true); ! String tagContents = tag.getText(); ! tag.setText(tagContents.substring(0,tagContents.length()-1)); ! } ! return tag; ! } else ! return null; ! } ! private int automataInput(Bool encounteredQuery, int i, int state,char ch, Tag tag, int pos, char[] ignorechar) { ! state = checkIllegalState(i, state, ch, tag); ! state = checkFinishedState(encounteredQuery, i, state, ch, tag, pos); ! state = toggleIgnoringState(state, ch, ignorechar); ! if (state==TAG_BEFORE_PARSING_STATE && ch!='<') { ! state= TAG_ILLEGAL_STATE; ! } ! if (state==TAG_IGNORE_DATA_STATE && ch=='<') { ! // If the next tag char is is close tag, then ! // this is legal, we should continue ! if (!isWellFormedTag(tag,pos)) ! state = TAG_IGNORE_BEGIN_TAG_STATE; ! } ! if (state==TAG_IGNORE_BEGIN_TAG_STATE && ch=='>') { ! state = TAG_IGNORE_DATA_STATE; ! } ! checkIfAppendable(encounteredQuery, state, ch, tag); ! state = checkBeginParsingState(i, state, ch, tag); ! return state; ! } ! private int checkBeginParsingState(int i, int state, char ch, Tag tag) { ! if (ch=='<' && ! (state==TAG_BEFORE_PARSING_STATE || ! state==TAG_ILLEGAL_STATE)) ! { ! // Transition from State 0 to State 1 - Record data till > i... [truncated message content] |