[Htmlparser-cvs] htmlparser/src/org/htmlparser/parserHelper CompositeTagScannerHelper.java,1.32,1.33
Brought to you by:
derrickoswald
From: <der...@us...> - 2003-05-24 21:04:47
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper In directory sc8-pr-cvs1:/tmp/cvs-serv7741/org/htmlparser/parserHelper Modified Files: CompositeTagScannerHelper.java StringParser.java Log Message: Fixed bug #741769 ScriptScanner doesn't handle quoted </script> tags Major overhaul of ScriptScanner. It now uses the scan() method of CompositeTagScanner (i.e. doesn't override). CompositeTagScanner now has a balance_quotes member field that dictates whether strings tags are scanned honouring single and double quotes. This affected the call chain through NodeReader and StringScanner which now have this parameter. StringScanner now correctly handles quotes if asked. The ignoreState stuff is removed, it didn't work anyway since a single StringScanner is used recursively by the NodeReader, and the member field would have been tromped. Sorry to all those who have broken code because of this, but it's for the better. Really. Index: CompositeTagScannerHelper.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/CompositeTagScannerHelper.java,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** CompositeTagScannerHelper.java 19 May 2003 02:49:57 -0000 1.32 --- CompositeTagScannerHelper.java 24 May 2003 21:04:44 -0000 1.33 *************** *** 51,54 **** --- 51,55 ---- private int startingLineNumber; private int endingLineNumber; + private boolean balance_quotes; public CompositeTagScannerHelper( *************** *** 57,61 **** String url, NodeReader reader, ! String currLine) { this.scanner = scanner; --- 58,63 ---- String url, NodeReader reader, ! String currLine, ! boolean balance_quotes) { this.scanner = scanner; *************** *** 67,72 **** this.nodeList = new NodeList(); this.endTagFound = false; } ! public Tag scan() throws ParserException { this.startingLineNumber = reader.getLastLineNumber(); --- 69,75 ---- this.nodeList = new NodeList(); this.endTagFound = false; + this.balance_quotes = balance_quotes; } ! public Tag scan() throws ParserException { this.startingLineNumber = reader.getLastLineNumber(); *************** *** 80,84 **** if (!endTagFound) { do { ! currentNode = reader.readElement(); if (currentNode==null) continue; currLine = reader.getCurrentLine(); --- 83,87 ---- if (!endTagFound) { do { ! currentNode = reader.readElement(balance_quotes); if (currentNode==null) continue; currLine = reader.getCurrentLine(); Index: StringParser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/StringParser.java,v retrieving revision 1.20 retrieving revision 1.21 diff -C2 -d -r1.20 -r1.21 *** StringParser.java 19 May 2003 02:49:57 -0000 1.20 --- StringParser.java 24 May 2003 21:04:44 -0000 1.21 *************** *** 38,48 **** private final static int PARSE_COMPLETED_STATE=2; private final static int PARSE_IGNORE_STATE=3; - private boolean ignoreStateMode=false; - - public Node find(NodeReader reader,String input,int position) { - return find(reader, input, position, ignoreStateMode); - } - /** * Returns true if the text at <code>pos</code> in <code>line</code> should be scanned as a tag. --- 38,42 ---- *************** *** 76,83 **** * @param input Input String * @param position Position to start parsing from ! * @param ignoreStateMode enter ignoring state - if set, will enter ignoring ! * state on encountering apostrophes */ ! public Node find(NodeReader reader,String input,int position, boolean ignoreStateMode) { StringBuffer textBuffer = new StringBuffer(); --- 70,77 ---- * @param input Input String * @param position Position to start parsing from ! * @param balance_quotes If <code>true</code> enter ignoring state on ! * encountering quotes. */ ! public Node find(NodeReader reader,String input,int position, boolean balance_quotes) { StringBuffer textBuffer = new StringBuffer(); *************** *** 87,90 **** --- 81,85 ---- int inputLen = input.length(); char ch; + char ignore_ender = '\"'; for (int i=position;(i<inputLen && state!=PARSE_COMPLETED_STATE);i++) { *************** *** 98,108 **** } } ! if (ignoreStateMode && (ch=='\'' || ch=='"')) { ! if (state==PARSE_IGNORE_STATE) state=PARSE_HAS_BEGUN_STATE; ! else { ! if (input.charAt(i+1)=='<') ! state = PARSE_IGNORE_STATE; } - } if (state==BEFORE_PARSE_BEGINS_STATE) --- 93,108 ---- } } ! if (balance_quotes && (ch=='\'' || ch=='"')) ! { ! if (state==PARSE_IGNORE_STATE) ! { ! if (ch == ignore_ender) ! state=PARSE_HAS_BEGUN_STATE; ! } ! else ! { ! ignore_ender = ch; ! state = PARSE_IGNORE_STATE; } } if (state==BEFORE_PARSE_BEGINS_STATE) *************** *** 140,150 **** return new StringNode(textBuffer,textBegin,textEnd); } - - public boolean isIgnoreStateMode() { - return ignoreStateMode; - } - - public void setIgnoreStateMode(boolean ignoreStateMode) { - this.ignoreStateMode = ignoreStateMode; - } } --- 140,142 ---- |