Re: [Htmlparser-developer] Page.getLine() seems broken.
Brought to you by:
derrickoswald
|
From: Derrick O. <Der...@Ro...> - 2005-09-28 22:26:24
|
It's zero based, unlike the usual text editor counting.
Matthew Buckett wrote:
> Page.getLine always seems to return the previous line. Attached are
> some tests that show this. It seems that the documentation on
> PageIndex says it should be the index the the first character of the
> line but it is actually set as being the position of the newline.
>
> I've attached a fix to Page.getLine() that makes it work but I don't
> know if the correct fix change PageIndex so that the index of the
> start of the line is put in it instead.
>
>------------------------------------------------------------------------
>
>Index: Page.java
>===================================================================
>RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Page.java,v
>retrieving revision 1.51
>diff -u -r1.51 Page.java
>--- Page.java 20 Jun 2005 01:56:32 -0000 1.51
>+++ Page.java 28 Sep 2005 16:16:14 -0000
>@@ -1106,12 +1106,12 @@
> size = mIndex.size ();
> if (line < size)
> {
>- start = mIndex.elementAt (line);
>- line++;
>- if (line <= size)
>- end = mIndex.elementAt (line);
>+ end = mIndex.elementAt (line);
>+ line--;
>+ if (line >= 0)
>+ start = mIndex.elementAt (line);
> else
>- end = mSource.offset ();
>+ start = 0;
> }
> else // current line
> {
>
>
>------------------------------------------------------------------------
>
>/* ======================================================================
>The Bodington System Software License, Version 1.0
>
>Copyright (c) 2001 The University of Leeds. All rights reserved.
>
>Redistribution and use in source and binary forms, with or without
>modification, are permitted provided that the following conditions are
>met:
>
>1. Redistributions of source code must retain the above copyright notice,
>this list of conditions and the following disclaimer.
>
>2. Redistributions in binary form must reproduce the above copyright
>notice, this list of conditions and the following disclaimer in the
>documentation and/or other materials provided with the distribution.
>
>3. The end-user documentation included with the redistribution, if any,
>must include the following acknowledgement: "This product includes
>software developed by the University of Leeds
>(http://www.bodington.org/)." Alternately, this acknowledgement may
>appear in the software itself, if and wherever such third-party
>acknowledgements normally appear.
>
>4. The names "Bodington", "Nathan Bodington", "Bodington System",
>"Bodington Open Source Project", and "The University of Leeds" must not be
>used to endorse or promote products derived from this software without
>prior written permission. For written permission, please contact
>d.g...@le....
>
>5. The name "Bodington" may not appear in the name of products derived
>from this software without prior written permission of the University of
>Leeds.
>
>THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
>WARRANTIES, INCLUDING, BUT NOT LIMITED TO, TITLE, THE IMPLIED WARRANTIES
>OF QUALITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
>EVENT SHALL THE UNIVERSITY OF LEEDS OR ITS CONTRIBUTORS BE LIABLE FOR
>ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
>DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
>GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
>HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
>STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
>ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
>POSSIBILITY OF SUCH DAMAGE.
>=========================================================
>
>This software was originally created by the University of Leeds and may contain voluntary
>contributions from others. For more information on the Bodington Open Source Project, please
>see http://bodington.org/
>
>====================================================================== */
>
>package org.htmlparser.tests;
>
>import junit.framework.TestCase;
>
>import org.htmlparser.Node;
>import org.htmlparser.Parser;
>import org.htmlparser.filters.TagNameFilter;
>import org.htmlparser.util.NodeList;
>import org.htmlparser.util.ParserException;
>
>public class LineTests extends TestCase
>{
> public void testGetLine1() throws ParserException {
> Parser parser = getParser();
> NodeList list = parser.parse(new TagNameFilter("h1"));
> Node node = list.elementAt(0);
> assertEquals("<h1>Line 1</h1>\n", node.getPage().getLine(
> node.getStartPosition()));
> }
>
> public void testGetLine2() throws ParserException {
> Parser parser = getParser();
> NodeList list = parser.parse(new TagNameFilter("h2"));
> Node node = list.elementAt(0);
> assertEquals("<h2>Line 2</h2>\n", node.getPage().getLine(
> node.getStartPosition()));
> }
>
> public void testGetLine3() throws ParserException {
> Parser parser = getParser();
> NodeList list = parser.parse(new TagNameFilter("h3"));
> Node node = list.elementAt(0);
> assertEquals("<h3>Line 3</h3>\n", node.getPage().getLine(
> node.getStartPosition()));
> }
>
> public Parser getParser()
> {
> Parser parser = new Parser();
> try
> {
> parser.setInputHTML(
> "<h1>Line 1</h1>\n"+
> "<h2>Line 2</h2>\n"+
> "<h3>Line 3</h3>\n"
> );
> }
> catch (ParserException e)
> {
> fail("Failed to parse");
> }
> return parser;
> }
>}
>
>
|