htmlparser-cvs Mailing List for HTML Parser (Page 54)
Brought to you by:
derrickoswald
You can subscribe to this list here.
| 2003 |
Jan
|
Feb
|
Mar
|
Apr
|
May
(141) |
Jun
(108) |
Jul
(66) |
Aug
(127) |
Sep
(155) |
Oct
(149) |
Nov
(72) |
Dec
(72) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2004 |
Jan
(100) |
Feb
(36) |
Mar
(21) |
Apr
(3) |
May
(87) |
Jun
(28) |
Jul
(84) |
Aug
(5) |
Sep
(14) |
Oct
|
Nov
|
Dec
|
| 2005 |
Jan
(1) |
Feb
(39) |
Mar
(26) |
Apr
(38) |
May
(14) |
Jun
(10) |
Jul
|
Aug
|
Sep
(13) |
Oct
(8) |
Nov
(10) |
Dec
|
| 2006 |
Jan
|
Feb
(1) |
Mar
(17) |
Apr
(20) |
May
(28) |
Jun
(24) |
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2015 |
Jan
|
Feb
|
Mar
(1) |
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util
In directory sc8-pr-cvs1:/tmp/cvs-serv7401/htmlparser/src/org/htmlparser/util
Modified Files:
ChainedException.java CommandLine.java
DefaultParserFeedback.java FeedbackManager.java Generate.java
IteratorImpl.java LinkProcessor.java NodeIterator.java
NodeList.java ParserException.java ParserFeedback.java
ParserUtils.java PeekingIterator.java SimpleNodeIterator.java
Translate.java package.html
Log Message:
Update version headers to 1.4-20030622 and update changelog.
Index: ChainedException.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/ChainedException.java,v
retrieving revision 1.30
retrieving revision 1.31
diff -C2 -d -r1.30 -r1.31
*** ChainedException.java 1 Jun 2003 20:50:17 -0000 1.30
--- ChainedException.java 22 Jun 2003 21:37:47 -0000 1.31
***************
*** 1,3 ****
! // HTMLParser Library v1_4_20030601 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
--- 1,3 ----
! // HTMLParser Library v1_4_20030622 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
Index: CommandLine.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/CommandLine.java,v
retrieving revision 1.29
retrieving revision 1.30
diff -C2 -d -r1.29 -r1.30
*** CommandLine.java 1 Jun 2003 20:50:17 -0000 1.29
--- CommandLine.java 22 Jun 2003 21:37:47 -0000 1.30
***************
*** 1,3 ****
! // HTMLParser Library v1_4_20030601 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
--- 1,3 ----
! // HTMLParser Library v1_4_20030622 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
Index: DefaultParserFeedback.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/DefaultParserFeedback.java,v
retrieving revision 1.17
retrieving revision 1.18
diff -C2 -d -r1.17 -r1.18
*** DefaultParserFeedback.java 1 Jun 2003 20:50:17 -0000 1.17
--- DefaultParserFeedback.java 22 Jun 2003 21:37:47 -0000 1.18
***************
*** 1,3 ****
! // HTMLParser Library v1_4_20030601 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
--- 1,3 ----
! // HTMLParser Library v1_4_20030622 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
Index: FeedbackManager.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/FeedbackManager.java,v
retrieving revision 1.31
retrieving revision 1.32
diff -C2 -d -r1.31 -r1.32
*** FeedbackManager.java 1 Jun 2003 20:50:17 -0000 1.31
--- FeedbackManager.java 22 Jun 2003 21:37:47 -0000 1.32
***************
*** 1,3 ****
! // HTMLParser Library v1_4_20030601 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
--- 1,3 ----
! // HTMLParser Library v1_4_20030622 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
Index: Generate.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/Generate.java,v
retrieving revision 1.32
retrieving revision 1.33
diff -C2 -d -r1.32 -r1.33
*** Generate.java 1 Jun 2003 20:50:18 -0000 1.32
--- Generate.java 22 Jun 2003 21:37:47 -0000 1.33
***************
*** 1,3 ****
! // HTMLParser Library v1_4_20030601 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
--- 1,3 ----
! // HTMLParser Library v1_4_20030622 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
Index: IteratorImpl.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/IteratorImpl.java,v
retrieving revision 1.18
retrieving revision 1.19
diff -C2 -d -r1.18 -r1.19
*** IteratorImpl.java 1 Jun 2003 20:50:18 -0000 1.18
--- IteratorImpl.java 22 Jun 2003 21:37:47 -0000 1.19
***************
*** 1,3 ****
! // HTMLParser Library v1_4_20030601 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
--- 1,3 ----
! // HTMLParser Library v1_4_20030622 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
Index: LinkProcessor.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/LinkProcessor.java,v
retrieving revision 1.16
retrieving revision 1.17
diff -C2 -d -r1.16 -r1.17
*** LinkProcessor.java 1 Jun 2003 20:50:18 -0000 1.16
--- LinkProcessor.java 22 Jun 2003 21:37:47 -0000 1.17
***************
*** 1,3 ****
! // HTMLParser Library v1_4_20030601 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
--- 1,3 ----
! // HTMLParser Library v1_4_20030622 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
Index: NodeIterator.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/NodeIterator.java,v
retrieving revision 1.18
retrieving revision 1.19
diff -C2 -d -r1.18 -r1.19
*** NodeIterator.java 1 Jun 2003 20:50:18 -0000 1.18
--- NodeIterator.java 22 Jun 2003 21:37:47 -0000 1.19
***************
*** 1,3 ****
! // HTMLParser Library v1_4_20030601 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
--- 1,3 ----
! // HTMLParser Library v1_4_20030622 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
Index: NodeList.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/NodeList.java,v
retrieving revision 1.31
retrieving revision 1.32
diff -C2 -d -r1.31 -r1.32
*** NodeList.java 13 Jun 2003 20:27:04 -0000 1.31
--- NodeList.java 22 Jun 2003 21:37:47 -0000 1.32
***************
*** 1,3 ****
! // HTMLParser Library v1_4_20030601 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
--- 1,3 ----
! // HTMLParser Library v1_4_20030622 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
Index: ParserException.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/ParserException.java,v
retrieving revision 1.16
retrieving revision 1.17
diff -C2 -d -r1.16 -r1.17
*** ParserException.java 1 Jun 2003 20:50:18 -0000 1.16
--- ParserException.java 22 Jun 2003 21:37:47 -0000 1.17
***************
*** 1,3 ****
! // HTMLParser Library v1_4_20030601 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
--- 1,3 ----
! // HTMLParser Library v1_4_20030622 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
Index: ParserFeedback.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/ParserFeedback.java,v
retrieving revision 1.17
retrieving revision 1.18
diff -C2 -d -r1.17 -r1.18
*** ParserFeedback.java 1 Jun 2003 20:50:18 -0000 1.17
--- ParserFeedback.java 22 Jun 2003 21:37:47 -0000 1.18
***************
*** 1,3 ****
! // HTMLParser Library v1_4_20030601 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
--- 1,3 ----
! // HTMLParser Library v1_4_20030622 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
Index: ParserUtils.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/ParserUtils.java,v
retrieving revision 1.19
retrieving revision 1.20
diff -C2 -d -r1.19 -r1.20
*** ParserUtils.java 1 Jun 2003 20:50:18 -0000 1.19
--- ParserUtils.java 22 Jun 2003 21:37:48 -0000 1.20
***************
*** 1,3 ****
! // HTMLParser Library v1_4_20030601 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
--- 1,3 ----
! // HTMLParser Library v1_4_20030622 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
Index: PeekingIterator.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/PeekingIterator.java,v
retrieving revision 1.6
retrieving revision 1.7
diff -C2 -d -r1.6 -r1.7
*** PeekingIterator.java 1 Jun 2003 20:50:18 -0000 1.6
--- PeekingIterator.java 22 Jun 2003 21:37:48 -0000 1.7
***************
*** 1,3 ****
! // HTMLParser Library v1_4_20030601 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
--- 1,3 ----
! // HTMLParser Library v1_4_20030622 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
Index: SimpleNodeIterator.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/SimpleNodeIterator.java,v
retrieving revision 1.20
retrieving revision 1.21
diff -C2 -d -r1.20 -r1.21
*** SimpleNodeIterator.java 1 Jun 2003 20:50:18 -0000 1.20
--- SimpleNodeIterator.java 22 Jun 2003 21:37:48 -0000 1.21
***************
*** 1,3 ****
! // HTMLParser Library v1_4_20030601 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
--- 1,3 ----
! // HTMLParser Library v1_4_20030622 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
Index: Translate.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/Translate.java,v
retrieving revision 1.26
retrieving revision 1.27
diff -C2 -d -r1.26 -r1.27
*** Translate.java 19 Jun 2003 17:24:30 -0000 1.26
--- Translate.java 22 Jun 2003 21:37:48 -0000 1.27
***************
*** 1,3 ****
! // HTMLParser Library v1_4_20030601 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
--- 1,3 ----
! // HTMLParser Library v1_4_20030622 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
Index: package.html
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/package.html,v
retrieving revision 1.6
retrieving revision 1.7
diff -C2 -d -r1.6 -r1.7
*** package.html 1 Jun 2003 20:50:18 -0000 1.6
--- package.html 22 Jun 2003 21:37:49 -0000 1.7
***************
*** 6,10 ****
@(#)package.html 1.60 98/01/27
! HTMLParser Library v1_4_20030601 - A java-based parser for HTML
Copyright (C) Dec 31, 2000 Somik Raha
--- 6,10 ----
@(#)package.html 1.60 98/01/27
! HTMLParser Library v1_4_20030622 - A java-based parser for HTML
Copyright (C) Dec 31, 2000 Somik Raha
|
|
From: <der...@us...> - 2003-06-22 21:37:50
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/visitorsTests In directory sc8-pr-cvs1:/tmp/cvs-serv7401/htmlparser/src/org/htmlparser/tests/visitorsTests Modified Files: AllTests.java Log Message: Update version headers to 1.4-20030622 and update changelog. Index: AllTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/visitorsTests/AllTests.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** AllTests.java 1 Jun 2003 20:50:17 -0000 1.25 --- AllTests.java 22 Jun 2003 21:37:47 -0000 1.26 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030601 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030622 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // |
|
From: <jke...@us...> - 2003-06-19 17:24:33
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests
In directory sc8-pr-cvs1:/tmp/cvs-serv16674/src/org/htmlparser/tests
Modified Files:
DecodingNodeTest.java
Log Message:
added support for StringNode Decoration
Index: DecodingNodeTest.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/DecodingNodeTest.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** DecodingNodeTest.java 17 Jun 2003 03:26:20 -0000 1.1
--- DecodingNodeTest.java 19 Jun 2003 17:24:30 -0000 1.2
***************
*** 18,22 ****
while (nodes.hasMoreNodes())
! decodedContent.append(nodes.nextNode().toHtml());
return decodedContent.toString();
--- 18,22 ----
while (nodes.hasMoreNodes())
! decodedContent.append(nodes.nextNode().toPlainTextString());
return decodedContent.toString();
***************
*** 25,32 ****
public void testAmpersand() throws Exception {
String ENCODED_WORKSHOP_TITLE =
! "<H1>The Testing & Refactoring Workshop</H1>";
String DECODED_WORKSHOP_TITLE =
! "<H1>The Testing & Refactoring Workshop</H1>";
assertEquals(
--- 25,32 ----
public void testAmpersand() throws Exception {
String ENCODED_WORKSHOP_TITLE =
! "The Testing & Refactoring Workshop";
String DECODED_WORKSHOP_TITLE =
! "The Testing & Refactoring Workshop";
assertEquals(
|
|
From: <jke...@us...> - 2003-06-19 17:24:33
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper
In directory sc8-pr-cvs1:/tmp/cvs-serv16674/src/org/htmlparser/parserHelper
Modified Files:
StringParser.java
Log Message:
added support for StringNode Decoration
Index: StringParser.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/StringParser.java,v
retrieving revision 1.26
retrieving revision 1.27
diff -C2 -d -r1.26 -r1.27
*** StringParser.java 17 Jun 2003 03:26:19 -0000 1.26
--- StringParser.java 19 Jun 2003 17:24:30 -0000 1.27
***************
*** 29,33 ****
package org.htmlparser.parserHelper;
! import org.htmlparser.AbstractNode;
import org.htmlparser.NodeReader;
import org.htmlparser.Parser;
--- 29,33 ----
package org.htmlparser.parserHelper;
! import org.htmlparser.Node;
import org.htmlparser.NodeReader;
import org.htmlparser.Parser;
***************
*** 74,78 ****
* encountering quotes.
*/
! public AbstractNode find(NodeReader reader,String input,int position, boolean balance_quotes)
{
StringBuffer textBuffer = new StringBuffer();
--- 74,78 ----
* encountering quotes.
*/
! public Node find(NodeReader reader,String input,int position, boolean balance_quotes)
{
StringBuffer textBuffer = new StringBuffer();
***************
*** 139,144 ****
}
}
! return new StringNode(textBuffer, textBegin, textEnd,
! reader.getParser().shouldDecodeNodes());
}
}
--- 139,144 ----
}
}
! return StringNode.createStringNode(textBuffer, textBegin, textEnd,
! reader.getParser().shouldDecodeNodes());
}
}
|
|
From: <jke...@us...> - 2003-06-19 17:24:32
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util
In directory sc8-pr-cvs1:/tmp/cvs-serv16674/src/org/htmlparser/util
Modified Files:
Translate.java
Log Message:
added support for StringNode Decoration
Index: Translate.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/Translate.java,v
retrieving revision 1.25
retrieving revision 1.26
diff -C2 -d -r1.25 -r1.26
*** Translate.java 1 Jun 2003 20:50:18 -0000 1.25
--- Translate.java 19 Jun 2003 17:24:30 -0000 1.26
***************
*** 470,473 ****
--- 470,477 ----
return (ret);
}
+
+ public static String decode (StringBuffer stringBuffer) {
+ return decode(stringBuffer.toString());
+ }
/**
|
|
From: <jke...@us...> - 2003-06-19 17:24:32
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser
In directory sc8-pr-cvs1:/tmp/cvs-serv16674/src/org/htmlparser
Modified Files:
Node.java StringNode.java AbstractNode.java
Added Files:
DecodingNode.java
Log Message:
added support for StringNode Decoration
--- NEW FILE: DecodingNode.java ---
package org.htmlparser;
import org.htmlparser.tags.CompositeTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.Translate;
import org.htmlparser.visitors.NodeVisitor;
public class DecodingNode implements Node {
private Node delegate;
protected DecodingNode(Node node) {
delegate = node;
}
public String toPlainTextString() {
return Translate.decode(delegate.toPlainTextString());
}
public void accept(NodeVisitor visitor) {
delegate.accept(visitor);
}
public void collectInto(NodeList collectionList, Class nodeType) {
delegate.collectInto(collectionList, nodeType);
}
public void collectInto(NodeList collectionList, String filter) {
delegate.collectInto(collectionList, filter);
}
public int elementBegin() {
return delegate.elementBegin();
}
public int elementEnd() {
return delegate.elementEnd();
}
public CompositeTag getParent() {
return delegate.getParent();
}
public String getText() {
return delegate.getText();
}
public void setParent(CompositeTag tag) {
delegate.setParent(tag);
}
public void setText(String text) {
delegate.setText(text);
}
public String toHtml() {
return delegate.toHtml();
}
public String toHTML() {
return delegate.toHTML();
}
}
Index: Node.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Node.java,v
retrieving revision 1.25
retrieving revision 1.26
diff -C2 -d -r1.25 -r1.26
*** Node.java 17 Jun 2003 03:26:19 -0000 1.25
--- Node.java 19 Jun 2003 17:24:30 -0000 1.26
***************
*** 113,116 ****
*/
public abstract void setParent(CompositeTag tag);
!
! }
\ No newline at end of file
--- 113,126 ----
*/
public abstract void setParent(CompositeTag tag);
!
! /**
! * Returns the text of the string line
! */
! public String getText();
!
! /**
! * Sets the string contents of the node.
! * @param The new text for the node.
! */
! public void setText(String text);
! }
Index: StringNode.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/StringNode.java,v
retrieving revision 1.21
retrieving revision 1.22
diff -C2 -d -r1.21 -r1.22
*** StringNode.java 17 Jun 2003 03:26:19 -0000 1.21
--- StringNode.java 19 Jun 2003 17:24:30 -0000 1.22
***************
*** 31,35 ****
import org.htmlparser.util.NodeList;
- import org.htmlparser.util.Translate;
import org.htmlparser.visitors.NodeVisitor;
--- 31,34 ----
***************
*** 39,47 ****
public class StringNode extends AbstractNode
{
- /**
- * boolean to tell whether decoding of this node should happen or not.
- */
- private boolean shouldDecode = false;;
-
public static final String STRING_FILTER="-string";
--- 38,41 ----
***************
*** 63,78 ****
}
!
! public StringNode(StringBuffer textBuffer, int textBegin, int textEnd,
! boolean shouldDecode) {
! this(textBuffer, textBegin, textEnd);
! this.shouldDecode = shouldDecode;
}
/**
* Returns the text of the string line
*/
! public String getText()
! {
return textBuffer.toString();
}
--- 57,73 ----
}
!
! public static Node createStringNode(
! StringBuffer textBuffer, int textBegin, int textEnd, boolean shouldDecode) {
! if (shouldDecode)
! return new DecodingNode(new StringNode(textBuffer, textBegin, textEnd));
! return new StringNode(textBuffer, textBegin, textEnd);
}
+
/**
* Returns the text of the string line
*/
! public String getText() {
return textBuffer.toString();
}
***************
*** 87,102 ****
public String toPlainTextString() {
! return nodeContents();
}
public String toHtml() {
! return nodeContents();
! }
!
! private String nodeContents() {
! String result = textBuffer.toString();
! if (shouldDecode)
! result = Translate.decode(result);
! return result;
}
--- 82,90 ----
public String toPlainTextString() {
! return textBuffer.toString();
}
public String toHtml() {
! return textBuffer.toString();
}
***************
*** 104,107 ****
--- 92,96 ----
return "Text = "+getText()+"; begins at : "+elementBegin()+"; ends at : "+elementEnd();
}
+
public void collectInto(NodeList collectionList, String filter) {
if (filter==STRING_FILTER) collectionList.add(this);
***************
*** 111,114 ****
visitor.visitStringNode(this);
}
-
}
--- 100,102 ----
Index: AbstractNode.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/AbstractNode.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** AbstractNode.java 13 Jun 2003 20:27:04 -0000 1.1
--- AbstractNode.java 19 Jun 2003 17:24:30 -0000 1.2
***************
*** 190,193 ****
--- 190,208 ----
parent = tag;
}
+
+ /**
+ * Returns the text of the string line
+ */
+ public String getText() {
+ return null;
+ }
+
+ /**
+ * Sets the string contents of the node.
+ * @param The new text for the node.
+ */
+ public void setText(String text) {
+
+ }
}
|
|
From: <jke...@us...> - 2003-06-17 03:26:23
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests
In directory sc8-pr-cvs1:/tmp/cvs-serv18761/src/org/htmlparser/tests
Modified Files:
AllTests.java
Added Files:
DecodingNodeTest.java
Log Message:
add setting for parser to perform the Translate.decode() on all StringNodes. This will later be refactored to a Decorator implementation.
--- NEW FILE: DecodingNodeTest.java ---
package org.htmlparser.tests;
import org.htmlparser.util.NodeIterator;
import org.htmlparser.util.ParserException;
public class DecodingNodeTest extends ParserTestCase {
public DecodingNodeTest(String name) {
super(name);
}
private String parseToObtainDecodedResult(String STRING_TO_DECODE)
throws ParserException {
StringBuffer decodedContent = new StringBuffer();
createParser(STRING_TO_DECODE);
parser.setNodeDecoding(true); // tell parser to decode StringNodes
NodeIterator nodes = parser.elements();
while (nodes.hasMoreNodes())
decodedContent.append(nodes.nextNode().toHtml());
return decodedContent.toString();
}
public void testAmpersand() throws Exception {
String ENCODED_WORKSHOP_TITLE =
"<H1>The Testing & Refactoring Workshop</H1>";
String DECODED_WORKSHOP_TITLE =
"<H1>The Testing & Refactoring Workshop</H1>";
assertEquals(
"ampersand in string",
DECODED_WORKSHOP_TITLE,
parseToObtainDecodedResult(ENCODED_WORKSHOP_TITLE));
}
public void testNumericReference() throws Exception {
String ENCODED_DIVISION_SIGN =
"÷ is the division sign.";
String DECODED_DIVISION_SIGN =
"÷ is the division sign.";
assertEquals(
"numeric reference for division sign",
DECODED_DIVISION_SIGN,
parseToObtainDecodedResult(ENCODED_DIVISION_SIGN));
}
public void testReferencesInString () throws Exception {
String ENCODED_REFERENCE_IN_STRING =
"Thus, the character entity reference ÷ is a more convenient" +
" form than ÷ for obtaining the division sign (÷)";
String DECODED_REFERENCE_IN_STRING =
"Thus, the character entity reference ÷ is a more convenient" +
" form than ÷ for obtaining the division sign (÷)";
assertEquals (
"character references within a string",
DECODED_REFERENCE_IN_STRING,
parseToObtainDecodedResult(ENCODED_REFERENCE_IN_STRING));
}
public void testBogusCharacterEntityReference() throws Exception {
String ENCODED_BOGUS_CHARACTER_ENTITY =
"The character entity reference &divode; is bogus";
String DECODED_BOGUS_CHARACTER_ENTITY =
"The character entity reference &divode; is bogus";
assertEquals (
"bogus character entity reference",
DECODED_BOGUS_CHARACTER_ENTITY,
parseToObtainDecodedResult(ENCODED_BOGUS_CHARACTER_ENTITY));
}
}
Index: AllTests.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/AllTests.java,v
retrieving revision 1.39
retrieving revision 1.40
diff -C2 -d -r1.39 -r1.40
*** AllTests.java 1 Jun 2003 20:50:14 -0000 1.39
--- AllTests.java 17 Jun 2003 03:26:20 -0000 1.40
***************
*** 104,107 ****
--- 104,108 ----
TestSuite basic = new TestSuite("Basic Tests");
basic.addTestSuite(ParserTest.class);
+ basic.addTestSuite(DecodingNodeTest.class);
suite.addTest(basic);
suite.addTest(org.htmlparser.tests.scannersTests.AllTests.suite());
|
|
From: <jke...@us...> - 2003-06-17 03:26:22
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper In directory sc8-pr-cvs1:/tmp/cvs-serv18761/src/org/htmlparser/parserHelper Modified Files: StringParser.java Log Message: add setting for parser to perform the Translate.decode() on all StringNodes. This will later be refactored to a Decorator implementation. Index: StringParser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/StringParser.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** StringParser.java 13 Jun 2003 20:27:04 -0000 1.25 --- StringParser.java 17 Jun 2003 03:26:19 -0000 1.26 *************** *** 139,143 **** } } ! return new StringNode(textBuffer,textBegin,textEnd); } } --- 139,144 ---- } } ! return new StringNode(textBuffer, textBegin, textEnd, ! reader.getParser().shouldDecodeNodes()); } } |
|
From: <jke...@us...> - 2003-06-17 03:26:22
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser
In directory sc8-pr-cvs1:/tmp/cvs-serv18761/src/org/htmlparser
Modified Files:
Node.java Parser.java StringNode.java
Log Message:
add setting for parser to perform the Translate.decode() on all StringNodes. This will later be refactored to a Decorator implementation.
Index: Node.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Node.java,v
retrieving revision 1.24
retrieving revision 1.25
diff -C2 -d -r1.24 -r1.25
*** Node.java 13 Jun 2003 20:27:04 -0000 1.24
--- Node.java 17 Jun 2003 03:26:19 -0000 1.25
***************
*** 113,115 ****
--- 113,116 ----
*/
public abstract void setParent(CompositeTag tag);
+
}
Index: Parser.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Parser.java,v
retrieving revision 1.44
retrieving revision 1.45
diff -C2 -d -r1.44 -r1.45
*** Parser.java 13 Jun 2003 20:27:04 -0000 1.44
--- Parser.java 17 Jun 2003 03:26:19 -0000 1.45
***************
*** 183,186 ****
--- 183,193 ----
/**
+ * Flag to tell the parser to decode nodes while parsing.
+ * Decoding occurs via the method, org.htmlparser.util.Translate.decode()
+ */
+ private boolean shouldDecodeNodes = false;
+
+
+ /**
* Feedback object.
*/
***************
*** 1208,1211 ****
--- 1215,1229 ----
public static String getLineSeparator() {
return lineSeparator;
+ }
+
+ /**
+ * Tells the parser to decode nodes using org.htmlparser.util.Translate.decode()
+ */
+ public void setNodeDecoding(boolean shouldDecodeNodes) {
+ this.shouldDecodeNodes = shouldDecodeNodes;
+ }
+
+ public boolean shouldDecodeNodes() {
+ return shouldDecodeNodes;
}
}
Index: StringNode.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/StringNode.java,v
retrieving revision 1.20
retrieving revision 1.21
diff -C2 -d -r1.20 -r1.21
*** StringNode.java 13 Jun 2003 20:27:04 -0000 1.20
--- StringNode.java 17 Jun 2003 03:26:19 -0000 1.21
***************
*** 31,34 ****
--- 31,35 ----
import org.htmlparser.util.NodeList;
+ import org.htmlparser.util.Translate;
import org.htmlparser.visitors.NodeVisitor;
***************
*** 38,42 ****
--- 39,49 ----
public class StringNode extends AbstractNode
{
+ /**
+ * boolean to tell whether decoding of this node should happen or not.
+ */
+ private boolean shouldDecode = false;;
+
public static final String STRING_FILTER="-string";
+
/**
* The text of the string.
***************
*** 57,60 ****
--- 64,72 ----
}
+ public StringNode(StringBuffer textBuffer, int textBegin, int textEnd,
+ boolean shouldDecode) {
+ this(textBuffer, textBegin, textEnd);
+ this.shouldDecode = shouldDecode;
+ }
/**
***************
*** 73,82 ****
textBuffer = new StringBuffer (text);
}
public String toPlainTextString() {
! return textBuffer.toString();
}
public String toHtml() {
! return textBuffer.toString();
}
public String toString() {
return "Text = "+getText()+"; begins at : "+elementBegin()+"; ends at : "+elementEnd();
--- 85,104 ----
textBuffer = new StringBuffer (text);
}
+
public String toPlainTextString() {
! return nodeContents();
}
+
public String toHtml() {
! return nodeContents();
}
+
+ private String nodeContents() {
+ String result = textBuffer.toString();
+ if (shouldDecode)
+ result = Translate.decode(result);
+ return result;
+ }
+
public String toString() {
return "Text = "+getText()+"; begins at : "+elementBegin()+"; ends at : "+elementEnd();
|
|
From: <so...@us...> - 2003-06-17 01:52:28
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/temporaryFailures In directory sc8-pr-cvs1:/tmp/cvs-serv7764/src/org/htmlparser/tests/temporaryFailures Added Files: AttributeParserTest.java TagParserTest.java Log Message: added temporaryFailures package --- NEW FILE: AttributeParserTest.java --- // HTMLParser Library v1_4_20030601 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // For any questions or suggestions, you can write to me at : // Email :so...@in... // // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com // ---- IMPORTANT: This class has failing tests ---- // Original Location: org.htmlparser.tests.parserHelperTests; // Pls rememeber to add test back to org.htmlparser.tests.parserHelperTests.AllTests.suite() // and delete these comments when you're done. // ---- NEEDS FIXING ---- package org.htmlparser.tests.temporaryFailures; import java.util.Hashtable; import org.htmlparser.Parser; import org.htmlparser.parserHelper.AttributeParser; import org.htmlparser.tags.Tag; import org.htmlparser.tags.data.TagData; import org.htmlparser.tests.ParserTestCase; public class AttributeParserTest extends ParserTestCase { private AttributeParser parser; private Tag tag; private Hashtable table; public AttributeParserTest(String name) { super(name); } protected void setUp() { parser = new AttributeParser(); } public void getParameterTableFor(String tagContents) { tag = new Tag(new TagData(0,0,tagContents,"")); table = parser.parseAttributes(tag); } public void testParseParameters() { getParameterTableFor("a b = \"c\""); assertEquals("Value","c",table.get("B")); } public void testParseTokenValues() { getParameterTableFor("a b = \"'\""); assertEquals("Value","'",table.get("B")); } public void testParseEmptyValues() { getParameterTableFor("a b = \"\""); assertEquals("Value","",table.get("B")); } public void testParseMissingEqual() { getParameterTableFor("a b\"c\""); assertEquals("ValueB","",table.get("B")); } public void testTwoParams(){ getParameterTableFor("PARAM NAME=\"Param1\" VALUE=\"Somik\">\n"); assertEquals("Param1","Param1",table.get("NAME")); assertEquals("Somik","Somik",table.get("VALUE")); } public void testPlainParams(){ getParameterTableFor("PARAM NAME=Param1 VALUE=Somik"); assertEquals("Param1","Param1",table.get("NAME")); assertEquals("Somik","Somik",table.get("VALUE")); } public void testValueMissing() { getParameterTableFor("INPUT type=\"checkbox\" name=\"Authorize\" value=\"Y\" checked"); assertEquals("Name of Tag","INPUT",table.get(Tag.TAGNAME)); assertEquals("Type","checkbox",table.get("TYPE")); assertEquals("Name","Authorize",table.get("NAME")); assertEquals("Value","Y",table.get("VALUE")); assertEquals("Checked","",table.get("CHECKED")); } /** * This is a simulation of a bug reported by Dhaval Udani - wherein * a space before the end of the tag causes a problem - there is a key * in the table with just a space in it and an empty value */ public void testIncorrectSpaceKeyBug() { getParameterTableFor("TEXTAREA name=\"Remarks\" "); // There should only be two keys.. assertEquals("There should only be two keys",2,table.size()); // The first key is name String key1 = "NAME"; String value1 = (String)table.get(key1); assertEquals("Expected value 1", "Remarks",value1); String key2 = Tag.TAGNAME; assertEquals("Expected Value 2","TEXTAREA",table.get(key2)); } public void testNullTag(){ getParameterTableFor("INPUT type="); assertEquals("Name of Tag","INPUT",table.get(Tag.TAGNAME)); assertEquals("Type","",table.get("TYPE")); } public void testAttributeWithSpuriousEqualTo() { getParameterTableFor( "a class=rlbA href=/news/866201.asp?0sl=-32" ); assertStringEquals( "href", "/news/866201.asp?0sl=-32", (String)table.get("HREF") ); } public void testQuestionMarksInAttributes() { getParameterTableFor( "a href=\"mailto:sa...@ne...?subject=Site Comments\"" ); assertStringEquals( "href", "mailto:sa...@ne...?subject=Site Comments", (String)table.get("HREF") ); assertStringEquals( "tag name", "A", (String)table.get(Tag.TAGNAME) ); } /** * Believe it or not Moi (vincent_aumont) wants htmlparser to parse a text file * containing something that looks nearly like a tag: * <pre> * " basic_string<char, string_char_traits<char>, <>>::basic_string()" * </pre> * This was throwing a null pointer exception when the empty <> was encountered. * Bug #725420 NPE in StringBean.visitTag **/ public void testEmptyTag () { getParameterTableFor(""); assertNotNull ("No Tag.TAGNAME",table.get(Tag.TAGNAME)); } /** * Test attributes when they contain scriptlets. * Submitted by Cory Seefurth * See also feature request #725376 Handle script in attributes. * Only perform this test if it's version 1.4 or higher. */ public void testJspWithinAttributes() { Parser parser; parser = new Parser (); if (1.4 <= Parser.getVersionNumber ()) { getParameterTableFor( "a href=\"<%=Application(\"sURL\")%>/literature/index.htm" ); assertStringEquals( "href", "<%=Application(\"sURL\")%>/literature/index.htm", (String)table.get("HREF") ); } } /** * Test Script in attributes. * See feature request #725376 Handle script in attributes. * Only perform this test if it's version 1.4 or higher. */ public void testScriptedTag () { Parser parser; parser = new Parser (); if (1.4 <= Parser.getVersionNumber ()) { getParameterTableFor("body onLoad=defaultStatus=''"); String name = (String)table.get(Tag.TAGNAME); assertNotNull ("No Tag.TAGNAME", name); assertStringEquals("tag name parsed incorrectly", "BODY", name); String value = (String)table.get ("ONLOAD"); assertStringEquals ("parameter parsed incorrectly", "defaultStatus=''", value); } } } --- NEW FILE: TagParserTest.java --- // HTMLParser Library v1_4_20030601 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // For any questions or suggestions, you can write to me at : // Email :so...@in... // // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic, Inc. // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com // ---- IMPORTANT: This class has failing tests ---- // Original Location: org.htmlparser.tests.parserHelperTests; // Pls rememeber to add test back to org.htmlparser.tests.parserHelperTests.AllTests.suite() // and delete these comments when you're done. // ---- NEEDS FIXING ---- package org.htmlparser.tests.temporaryFailures; import java.util.HashMap; import java.util.Map; import org.htmlparser.Node; import org.htmlparser.Parser; import org.htmlparser.tags.LinkTag; import org.htmlparser.tags.Tag; import org.htmlparser.tests.ParserTestCase; import org.htmlparser.util.ParserException; public class TagParserTest extends ParserTestCase { private static final String TEST_HTML = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">" + "<!-- Server: sf-web2 -->" + "<html lang=\"en\">" + " <head><link rel=\"stylesheet\" type=\"text/css\" href=\"http://sourceforge.net/cssdef.php\">" + " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">" + " <TITLE>SourceForge.net: Modify: 711073 - HTMLTagParser not threadsafe as a static variable in Tag</TITLE>" + " <SCRIPT language=\"JavaScript\" type=\"text/javascript\">" + " <!--" + " function help_window(helpurl) {" + " HelpWin = window.open( 'http://sourceforge.net' + helpurl,'HelpWindow','scrollbars=yes,resizable=yes,toolbar=no,height=400,width=400');" + " }" + " // -->" + " </SCRIPT>" + " <link rel=\"SHORTCUT ICON\" href=\"/images/favicon.ico\">" + "<!-- This is temp javascript for the jump button. If we could actually have a jump script on the server side that would be ideal -->" + "<script language=\"JavaScript\" type=\"text/javascript\">" + "<!--" + " function jump(targ,selObj,restore){ //v3.0" + " if (selObj.options[selObj.selectedIndex].value) " + " eval(targ+\".location='\"+selObj.options[selObj.selectedIndex].value+\"'\");" + " if (restore) selObj.selectedIndex=0;" + " }" + " //-->" + "</script>" + "<a href=\"http://normallink.com/sometext.html\">" + "<style type=\"text/css\">" + "<!--" + "A:link { text-decoration:none }" + "A:visited { text-decoration:none }" + "A:active { text-decoration:none }" + "A:hover { text-decoration:underline; color:#0066FF; }" + "-->" + "</style>" + "</head>" + "<body bgcolor=\"#FFFFFF\" text=\"#000000\" leftmargin=\"0\" topmargin=\"0\" marginwidth=\"0\" marginheight=\"0\" link=\"#003399\" vlink=\"#003399\" alink=\"#003399\">"; private Map results; private int testProgress; public TagParserTest(String name) { super(name); } public void testTagWithQuotes() throws Exception { String testHtml = "<img src=\"http://g-images.amazon.com/images/G/01/merchants/logos/marshall-fields-logo-20.gif\" width=87 height=20 border=0 alt=\"Marshall Field's\">"; createParser(testHtml); parseAndAssertNodeCount(1); assertType("should be Tag",Tag.class,node[0]); Tag tag = (Tag)node[0]; assertStringEquals("alt","Marshall Field's",tag.getAttribute("ALT")); assertStringEquals( "html", "<IMG BORDER=\"0\" ALT=\"Marshall Field's\" WIDTH=\"87\" SRC=\"http://g-images.amazon.com/images/G/01/merchants/logos/marshall-fields-logo-20.gif\" HEIGHT=\"20\">", tag.toHtml() ); } public void testEmptyTag() throws Exception { createParser("<custom/>"); parseAndAssertNodeCount(1); assertType("should be Tag",Tag.class,node[0]); Tag tag = (Tag)node[0]; assertStringEquals("tag name","CUSTOM",tag.getTagName()); assertTrue("empty tag",tag.isEmptyXmlTag()); assertStringEquals( "html", "<CUSTOM/>", tag.toHtml() ); } public void testTagWithCloseTagSymbolInAttribute() throws ParserException { createParser("<tag att=\"a>b\">"); parseAndAssertNodeCount(1); assertType("should be Tag",Tag.class,node[0]); Tag tag = (Tag)node[0]; assertStringEquals("attribute","a>b",tag.getAttribute("att")); } public void testTagWithOpenTagSymbolInAttribute() throws ParserException { createParser("<tag att=\"a<b\">"); parseAndAssertNodeCount(1); assertType("should be Tag",Tag.class,node[0]); Tag tag = (Tag)node[0]; assertStringEquals("attribute","a<b",tag.getAttribute("att")); } public void testTagWithSingleQuote() throws ParserException { createParser("<tag att=\'a<b\'>"); parseAndAssertNodeCount(1); assertType("should be Tag",Tag.class,node[0]); Tag tag = (Tag)node[0]; assertStringEquals("html","<TAG ATT=\"a<b\">",tag.toHtml()); assertStringEquals("attribute","a<b",tag.getAttribute("att")); } /** * The following multi line test cases are from * bug #725749 Parser does not handle < and > in multi-line attributes * submitted by Joe Robins (zorblak) */ public void testMultiLine1 () throws ParserException { createParser("<meta name=\"foo\" content=\"foo<bar>\">"); parseAndAssertNodeCount (1); assertType ("should be Tag", Tag.class, node[0]); Tag tag = (Tag)node[0]; String html = tag.toHtml (); assertStringEquals ("html","<META CONTENT=\"foo<bar>\" NAME=\"foo\">", html); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","foo<bar>", attribute2); } public void testMultiLine2 () throws ParserException { createParser("<meta name=\"foo\" content=\"foo<bar\">"); parseAndAssertNodeCount (1); assertType ("should be Tag", Tag.class, node[0]); Tag tag = (Tag)node[0]; String html = tag.toHtml (); assertStringEquals ("html","<META CONTENT=\"foo<bar\" NAME=\"foo\">", html); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","foo<bar", attribute2); } public void testMultiLine3 () throws ParserException { createParser("<meta name=\"foo\" content=\"foobar>\">"); parseAndAssertNodeCount (1); assertType ("should be Tag", Tag.class, node[0]); Tag tag = (Tag)node[0]; String html = tag.toHtml (); assertStringEquals ("html","<META CONTENT=\"foobar>\" NAME=\"foo\">", html); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","foobar>", attribute2); } public void testMultiLine4 () throws ParserException { createParser("<meta name=\"foo\" content=\"foo\nbar>\">"); parseAndAssertNodeCount (1); assertType ("should be Tag", Tag.class, node[0]); Tag tag = (Tag)node[0]; String html = tag.toHtml (); assertStringEquals ("html","<META CONTENT=\"foo\r\nbar>\" NAME=\"foo\">", html); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","foo\r\nbar>", attribute2); } /** * Test multiline tag like attribute. * See feature request #725749 Handle < and > in multi-line attributes. * Only perform this test if it's version 1.4 or higher. */ public void testMultiLine5 () throws ParserException { // <meta name="foo" content="<foo> // bar"> createParser("<meta name=\"foo\" content=\"<foo>\nbar\">"); if (1.4 <= Parser.getVersionNumber ()) { parseAndAssertNodeCount (1); assertType ("should be Tag", Tag.class, node[0]); Tag tag = (Tag)node[0]; String html = tag.toHtml (); assertStringEquals ("html","<META CONTENT=\"<foo>\r\nbar\" NAME=\"foo\">", html); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","<foo>\r\nbar", attribute2); } } /** * Test multiline broken tag like attribute. * See feature request #725749 Handle < and > in multi-line attributes. * Only perform this test if it's version 1.4 or higher. */ public void testMultiLine6 () throws ParserException { // <meta name="foo" content="foo> // bar"> createParser("<meta name=\"foo\" content=\"foo>\nbar\">"); if (1.4 <= Parser.getVersionNumber ()) { parseAndAssertNodeCount (1); assertType ("should be Tag", Tag.class, node[0]); Tag tag = (Tag)node[0]; String html = tag.toHtml (); assertStringEquals ("html","<META CONTENT=\"foo>\r\nbar\" NAME=\"foo\">", html); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","foo>\r\nbar", attribute2); } } /** * Test multiline split tag like attribute. * See feature request #725749 Handle < and > in multi-line attributes. * Only perform this test if it's version 1.4 or higher. */ public void testMultiLine7 () throws ParserException { // <meta name="foo" content="<foo // bar"> createParser("<meta name=\"foo\" content=\"<foo\nbar\""); if (1.4 <= Parser.getVersionNumber ()) { parseAndAssertNodeCount (1); assertType ("should be Tag", Tag.class, node[0]); Tag tag = (Tag)node[0]; String html = tag.toHtml (); assertStringEquals ("html","<META CONTENT=\"<foo\r\nbar\" NAME=\"foo\">", html); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","<foo\r\nbar", attribute2); } } /** * End of multi line test cases. */ /** * Test multiple threads running against the parser. * See feature request #736144 Handle multi-threaded operation. * Only perform this test if it's version 1.4 or higher. */ public void testThreadSafety() throws Exception { createParser("<html></html>"); if (1.4 <= Parser.getVersionNumber ()) { String testHtml1 = "<a HREF=\"/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html\">20020702 Report 1</A>" + TEST_HTML; String testHtml2 = "<a href=\"http://normallink.com/sometext.html\">" + TEST_HTML; ParsingThread parsingThread [] = new ParsingThread[100]; results = new HashMap(); testProgress = 0; for (int i=0;i<parsingThread.length;i++) { if (i<parsingThread.length/2) parsingThread[i] = new ParsingThread(i,testHtml1,parsingThread.length); else parsingThread[i] = new ParsingThread(i,testHtml2,parsingThread.length); Thread thread = new Thread(parsingThread[i]); thread.start(); } int completionValue = computeCompletionValue(parsingThread.length); do { try { Thread.sleep(50); } catch (InterruptedException e) { } } while (testProgress!=completionValue); for (int i=0;i<parsingThread.length;i++) { if (!parsingThread[i].passed()) { assertNotNull("Thread "+i+" link 1",parsingThread[i].getLink1()); assertNotNull("Thread "+i+" link 2",parsingThread[i].getLink2()); if (i<parsingThread.length/2) { assertStringEquals( "Thread "+i+", link 1:", "/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html", parsingThread[i].getLink1().getLink() ); assertStringEquals( "Thread "+i+", link 2:", "http://normallink.com/sometext.html", parsingThread[i].getLink2().getLink() ); } else { assertStringEquals( "Thread "+i+", link 1:", "http://normallink.com/sometext.html", parsingThread[i].getLink1().getLink() ); assertNotNull("Thread "+i+" link 2",parsingThread[i].getLink2()); assertStringEquals( "Thread "+i+", link 2:", "/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html", parsingThread[i].getLink2().getLink() ); } } } } } private int computeCompletionValue(int numThreads) { return numThreads * (numThreads - 1) / 2; } class ParsingThread implements Runnable { Parser parser; int id; LinkTag link1, link2; boolean result; int max; ParsingThread(int id, String testHtml, int max) { this.id = id; this.max = max; this.parser = Parser.createParser(testHtml); parser.registerScanners(); } public void run() { try { result = false; Node linkTag [] = parser.extractAllNodesThatAre(LinkTag.class); link1 = (LinkTag)linkTag[0]; link2 = (LinkTag)linkTag[1]; if (id<max/2) { if (link1.getLink().equals("/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html") && link2.getLink().equals("http://normallink.com/sometext.html")) result = true; } else { if (link1.getLink().equals("http://normallink.com/sometext.html") && link2.getLink().equals("http://normallink.com/sometext.html")) result = true; } } catch (ParserException e) { System.err.println("Parser Exception"); e.printStackTrace(); } finally { testProgress += id; } } public LinkTag getLink1() { return link1; } public LinkTag getLink2() { return link2; } public boolean passed() { return result; } } } |
|
From: <so...@us...> - 2003-06-17 01:52:28
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests
In directory sc8-pr-cvs1:/tmp/cvs-serv7764/src/org/htmlparser/tests/parserHelperTests
Modified Files:
AllTests.java
Removed Files:
AttributeParserTest.java TagParserTest.java
Log Message:
added temporaryFailures package
Index: AllTests.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests/AllTests.java,v
retrieving revision 1.17
retrieving revision 1.18
diff -C2 -d -r1.17 -r1.18
*** AllTests.java 1 Jun 2003 20:50:14 -0000 1.17
--- AllTests.java 17 Jun 2003 01:52:25 -0000 1.18
***************
*** 28,32 ****
package org.htmlparser.tests.parserHelperTests;
! import junit.framework.TestSuite;
public class AllTests extends junit.framework.TestCase
--- 28,32 ----
package org.htmlparser.tests.parserHelperTests;
! import junit.framework.*;
public class AllTests extends junit.framework.TestCase
***************
*** 39,48 ****
public static TestSuite suite() {
TestSuite suite = new TestSuite("Parser Helper Tests");
!
! suite.addTestSuite(AttributeParserTest.class);
suite.addTestSuite(CompositeTagScannerHelperTest.class);
suite.addTestSuite(RemarkNodeParserTest.class);
suite.addTestSuite(StringParserTest.class);
! suite.addTestSuite(TagParserTest.class);
return suite;
--- 39,49 ----
public static TestSuite suite() {
TestSuite suite = new TestSuite("Parser Helper Tests");
! // To-do: Test below should be enabled after it passes
! // suite.addTestSuite(AttributeParserTest.class);
suite.addTestSuite(CompositeTagScannerHelperTest.class);
suite.addTestSuite(RemarkNodeParserTest.class);
suite.addTestSuite(StringParserTest.class);
! // To-do: Test below should be enabled after it passes
! // suite.addTestSuite(TagParserTest.class);
return suite;
--- AttributeParserTest.java DELETED ---
--- TagParserTest.java DELETED ---
|
|
From: <so...@us...> - 2003-06-17 01:52:22
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/temporaryFailures In directory sc8-pr-cvs1:/tmp/cvs-serv7744/src/org/htmlparser/tests/temporaryFailures Log Message: Directory /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/temporaryFailures added to the repository |
|
From: <so...@us...> - 2003-06-17 01:38:26
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags
In directory sc8-pr-cvs1:/tmp/cvs-serv6437/src/org/htmlparser/tags
Modified Files:
Tag.java
Log Message:
new AttributeParser created every time attributes are to be parsed.
Done to achieve thread-safety.
Index: Tag.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/Tag.java,v
retrieving revision 1.31
retrieving revision 1.32
diff -C2 -d -r1.31 -r1.32
*** Tag.java 13 Jun 2003 20:27:05 -0000 1.31
--- Tag.java 17 Jun 2003 01:38:23 -0000 1.32
***************
*** 67,71 ****
private final static String EMPTY_STRING="";
! private static AttributeParser paramParser = new AttributeParser();
private static TagParser tagParser;
/**
--- 67,71 ----
private final static String EMPTY_STRING="";
! private AttributeParser attributeParser;
private static TagParser tagParser;
/**
***************
*** 181,185 ****
*/
private Hashtable parseAttributes(){
! return paramParser.parseAttributes(this);
}
--- 181,186 ----
*/
private Hashtable parseAttributes(){
! attributeParser = new AttributeParser();
! return attributeParser.parseAttributes(this);
}
|
|
From: <so...@us...> - 2003-06-17 01:35:08
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper
In directory sc8-pr-cvs1:/tmp/cvs-serv6124/src/org/htmlparser/parserHelper
Modified Files:
AttributeParser.java
Log Message:
refactored AttributeParser to be more readable
Index: AttributeParser.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/AttributeParser.java,v
retrieving revision 1.25
retrieving revision 1.26
diff -C2 -d -r1.25 -r1.26
*** AttributeParser.java 1 Jun 2003 20:50:10 -0000 1.25
--- AttributeParser.java 17 Jun 2003 01:35:04 -0000 1.26
***************
*** 44,55 ****
*/
public class AttributeParser {
! private final String delima = " \t\r\n\f=\"'>";
! private final String delimb = " \t\r\n\f\"'>";
! private final char doubleQuote = '\"';
! private final char singleQuote = '\'';
private String delim;
/**
* Method to break the tag into pieces.
--- 44,68 ----
*/
public class AttributeParser {
! private final String DELIMETERS = " \t\r\n\f=\"'>";
! private final String DELIMETERS_WITHOUT_EQUALS = " \t\r\n\f\"'>";
! private final char DOUBLE_QUOTE = '\"';
! private final char SINGLE_QUOTE = '\'';
private String delim;
+ private Hashtable attributeTable;
+ private String element;
+ private String name;
+ private String value;
+ private String part;
+ private String empty;
+ private boolean equal;
+ private StringTokenizer tokenizer;
+ private boolean doubleQuote;
+ private boolean singleQuote;
+ private boolean ready;
+ private String currentToken;
+ private String tokenAccumulator;
/**
* Method to break the tag into pieces.
***************
*** 91,149 ****
*
*/
! public Hashtable parseAttributes(Tag tag){
! Hashtable h = new Hashtable();
! String element,name,value,nextPart=null;
! String empty=null;
name=null;
value=null;
element=null;
! boolean waitingForEqual=false;
! delim=delima;
! StringTokenizer tokenizer = new StringTokenizer(tag.getText(),delim,true);
while (true) {
! nextPart=getNextPart(tokenizer,delim);
! delim=delima;
! if (element==null && nextPart != null && !nextPart.equals("=")){
! element = nextPart;
! putDataIntoTable(h,element,null,true);
}
else {
! if (nextPart != null && (0 < nextPart.length ())) {
! if (name == null) {
! if (!nextPart.substring(0,1).equals(" ")) {
! name = nextPart;
! waitingForEqual=true;
! }
! }
! else {
! if (waitingForEqual){
! if (nextPart.equals("=")) {
! waitingForEqual=false;
! delim=delimb;
! }
! else {
! putDataIntoTable(h,name,"",false);
! name=nextPart;
! value=null;
! }
! }
! if (!waitingForEqual && !nextPart.equals("=")) {
! value=nextPart;
! putDataIntoTable(h,name,value,false);
! name=null;
! value=null;
! }
! }
}
else {
! if (name != null) {
! if (name.equals("/")) {
! putDataIntoTable(h,Tag.EMPTYTAG,"",false);
! } else {
! putDataIntoTable(h,name,"",false);
! }
! name=null;
! value=null;
! }
break;
}
--- 104,130 ----
*
*/
! public Hashtable parseAttributes(Tag tag) {
! attributeTable = new Hashtable();
! part = null;
! empty = null;
name=null;
value=null;
element=null;
! equal = false;
! delim=DELIMETERS;
! tokenizer = new StringTokenizer(tag.getText(),delim,true);
while (true) {
! part=getNextPartUsing(delim);
! delim=DELIMETERS;
! if (element==null && part != null && !part.equals("=")){
! element = part;
! putDataIntoTable(attributeTable,element,null,true);
}
else {
! if (isValid(part)) {
! process(part);
}
else {
! processInvalidPart();
break;
}
***************
*** 151,207 ****
}
if (null == element) // handle no tag contents
! putDataIntoTable(h,"",null,true);
! return h;
}
! private String getNextPart(StringTokenizer tokenizer,String deli){
! String tokenAccumulator=null;
! boolean isDoubleQuote=false;
! boolean isSingleQuote=false;
! boolean isDataReady=false;
! String currentToken;
! while (isDataReady == false && tokenizer.hasMoreTokens()) {
! currentToken = tokenizer.nextToken(deli);
! //
! // First let's combine tokens that are inside "" or ''
! //
! if (isDoubleQuote || isSingleQuote) {
! if (isDoubleQuote && currentToken.charAt(0)==doubleQuote){
! isDoubleQuote= false;
! isDataReady=true;
! } else if (isSingleQuote && currentToken.charAt(0)==singleQuote) {
! isSingleQuote=false;
! isDataReady=true;
! }else {
! tokenAccumulator += currentToken;
! continue;
! }
! } else if (currentToken.charAt(0)==doubleQuote){
! isDoubleQuote= true;
! tokenAccumulator = "";
! continue;
! } else if (currentToken.charAt(0)==singleQuote){
! isSingleQuote=true;
! tokenAccumulator="";
! continue;
! } else tokenAccumulator = currentToken;
! if (tokenAccumulator.equals(currentToken)) {
! if (delim.indexOf(tokenAccumulator)>=0) {
! if (tokenAccumulator.equals("=")){
! isDataReady=true;
! }
! }
! else {
! isDataReady=true;
! }
! }
! else isDataReady=true;
}
return tokenAccumulator;
}
--- 132,243 ----
}
if (null == element) // handle no tag contents
! putDataIntoTable(attributeTable,"",null,true);
! return attributeTable;
}
! private void processInvalidPart() {
! if (name != null) {
! if (name.equals("/")) {
! putDataIntoTable(attributeTable,Tag.EMPTYTAG,"",false);
! } else {
! putDataIntoTable(attributeTable,name,"",false);
! }
! name=null;
! value=null;
! }
! }
! private boolean isValid(String part) {
! return part != null && (0 < part.length ());
! }
! private void process(String part) {
! if (name == null) {
! if (!part.substring(0,1).equals(" ")) {
! name = part;
! equal=true;
! }
! }
! else {
! if (equal){
! if (part.equals("=")) {
! equal=false;
! delim=DELIMETERS_WITHOUT_EQUALS;
! }
! else {
! putDataIntoTable(attributeTable,name,"",false);
! name=part;
! value=null;
! }
! }
! if (!equal && !part.equals("=")) {
! value=part;
! putDataIntoTable(attributeTable,name,value,false);
! name=null;
! value=null;
! }
! }
! }
! private String getNextPartUsing(String delimiter) {
! tokenAccumulator = null;
! doubleQuote = false;
! singleQuote = false;
! ready = false;
! while (ready == false && tokenizer.hasMoreTokens()) {
! currentToken = tokenizer.nextToken(delimiter);
+ if (doubleQuote || singleQuote) {
+ combineTokensInsideSingleOrDoubleQuotes();
+ } else if (isCurrentTokenDoubleQuote()){
+ doubleQuote= true;
+ tokenAccumulator = "";
+ } else if (isCurrentTokenSingleQuote()){
+ singleQuote=true;
+ tokenAccumulator="";
+ } else {
+ tokenAccumulator = currentToken;
+ ready = isReadyWithNextPart(currentToken);
+ }
}
return tokenAccumulator;
}
+
+ private boolean isReadyWithNextPart(String currentToken) {
+ boolean ready = false;
+ if (isDelimeter(currentToken)) {
+ if (currentToken.equals("=")){
+ ready=true;
+ }
+ }
+ else {
+ ready=true;
+ }
+ return ready;
+ }
+
+ private boolean isDelimeter(String token) {
+ return delim.indexOf(tokenAccumulator)>=0;
+ }
+
+ private boolean isCurrentTokenSingleQuote() {
+ return currentToken.charAt(0)==SINGLE_QUOTE;
+ }
+
+ private boolean isCurrentTokenDoubleQuote() {
+ return currentToken.charAt(0)==DOUBLE_QUOTE;
+ }
+
+ private void combineTokensInsideSingleOrDoubleQuotes() {
+ if (doubleQuote && currentToken.charAt(0)==DOUBLE_QUOTE){
+ doubleQuote= false;
+ ready=true;
+ } else if (singleQuote && currentToken.charAt(0)==SINGLE_QUOTE) {
+ singleQuote=false;
+ ready=true;
+ }else {
+ tokenAccumulator += currentToken;
+ }
+ }
|
|
From: <so...@us...> - 2003-06-17 01:33:04
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper In directory sc8-pr-cvs1:/tmp/cvs-serv5788/src/org/htmlparser/parserHelper Modified Files: ScriptScannerHelper.java Log Message: added header Index: ScriptScannerHelper.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/ScriptScannerHelper.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** ScriptScannerHelper.java 17 Jun 2003 01:32:06 -0000 1.1 --- ScriptScannerHelper.java 17 Jun 2003 01:33:01 -0000 1.2 *************** *** 1,2 **** --- 1,29 ---- + //HTMLParser Library v1_4_20030601 - A java-based parser for HTML + //Copyright (C) Dec 31, 2000 Somik Raha + // + //This library is free software; you can redistribute it and/or + //modify it under the terms of the GNU Lesser General Public + //License as published by the Free Software Foundation; either + //version 2.1 of the License, or (at your option) any later version. + // + //This library is distributed in the hope that it will be useful, + //but WITHOUT ANY WARRANTY; without even the implied warranty of + //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + //Lesser General Public License for more details. + // + //You should have received a copy of the GNU Lesser General Public + //License along with this library; if not, write to the Free Software + //Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + // + //For any questions or suggestions, you can write to me at : + //Email :so...@in... + // + //Postal Address : + //Somik Raha + //Extreme Programmer & Coach + //Industrial Logic, Inc. + //2583 Cedar Street, Berkeley, + //CA 94708, USA + //Website : http://www.industriallogic.com package org.htmlparser.parserHelper; |
|
From: <so...@us...> - 2003-06-17 01:32:10
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper
In directory sc8-pr-cvs1:/tmp/cvs-serv5705/src/org/htmlparser/parserHelper
Added Files:
ScriptScannerHelper.java
Log Message:
created ScriptScannerHelper to allow thread safety in ScriptScanner
--- NEW FILE: ScriptScannerHelper.java ---
package org.htmlparser.parserHelper;
import org.htmlparser.*;
import org.htmlparser.scanners.*;
import org.htmlparser.tags.*;
import org.htmlparser.tags.data.*;
import org.htmlparser.util.*;
public class ScriptScannerHelper {
private int endTagLoc;
private Tag endTag;
private Tag startTag;
private int startingPos;
private boolean sameLine;
private boolean endTagFound;
private NodeReader reader;
private StringBuffer scriptContents;
private ScriptScanner scriptScanner;
private Tag tag;
private String url;
private String currLine;
public ScriptScannerHelper(Tag tag, String url, NodeReader nodeReader, String currLine, ScriptScanner scriptScanner) {
this.reader = nodeReader;
this.scriptScanner = scriptScanner;
this.tag = tag;
this.url = url;
this.currLine = currLine;
}
public Tag scan() throws ParserException {
int startLine = reader.getLastLineNumber();
startTag = tag;
extractScriptTagFrom(currLine);
if (isScriptEndTagNotFound()) {
createScriptEndTag(tag, currLine);
}
return createScriptTagUsing(url, currLine, startLine);
}
private Tag createScriptTagUsing(String url, String currLine, int startLine) {
return scriptScanner.createTag(
new TagData(
startTag.elementBegin(),
endTag.elementEnd(),
startLine,
reader.getLastLineNumber(),
startTag.getText(),
currLine,
url,
false
), new CompositeTagData(
startTag,endTag,createChildrenNodeList()
)
);
}
private NodeList createChildrenNodeList() {
NodeList childrenNodeList = new NodeList();
childrenNodeList.add(
new StringNode(
scriptContents,
startTag.elementEnd(),
endTag.elementBegin()-1
)
);
return childrenNodeList;
}
private void createScriptEndTag(Tag tag, String currLine) {
// If end tag doesn't exist, create one
String endTagName = tag.getTagName();
int endTagBegin = reader.getLastReadPosition()+1 ;
int endTagEnd = endTagBegin + endTagName.length() + 2;
endTag = new EndTag(
new TagData(
endTagBegin,
endTagEnd,
endTagName,
currLine
)
);
}
private boolean isScriptEndTagNotFound() {
return endTag == null;
}
private void extractScriptTagFrom(String currLine) throws ParserException {
String line = null;
scriptContents = new StringBuffer();
endTagFound = false;
endTag = null;
line = currLine;
sameLine = true;
startingPos = startTag.elementEnd();
do {
doExtractionOfScriptContentsFrom(line);
if (!endTagFound) {
line = reader.getNextLine();
startingPos = 0;
}
if (sameLine)
sameLine = false;
}
while (line!=null && !endTagFound);
}
private void doExtractionOfScriptContentsFrom(String line) throws ParserException {
endTagLoc = line.toUpperCase().indexOf(scriptScanner.getEndTag(),startingPos);
findStartingAndEndingLocations(line);
if (endTagLoc!=-1) {
extractEndTagFrom(line);
} else {
continueParsing(line);
}
}
private void continueParsing(String line) {
if (sameLine)
scriptContents.append(
line.substring(
startTag.elementEnd()+1
)
);
else {
scriptContents.append(Parser.getLineSeparator());
scriptContents.append(line);
}
}
private void extractEndTagFrom(String line) throws ParserException {
endTagFound = true;
endTag = (EndTag)EndTag.find(line,endTagLoc);
if (sameLine)
scriptContents.append(
getCodeBetweenStartAndEndTags(
line,
startTag,
endTagLoc)
);
else {
scriptContents.append(Parser.getLineSeparator());
scriptContents.append(line.substring(0,endTagLoc));
}
reader.setPosInLine(endTag.elementEnd());
}
private void findStartingAndEndingLocations(String line) {
while (endTagLoc>0 && isThisEndTagLocationFalseMatch(line, endTagLoc)) {
startingPos = endTagLoc+scriptScanner.getEndTag().length();
endTagLoc = line.toUpperCase().indexOf(scriptScanner.getEndTag(), startingPos);
}
}
public String getCodeBetweenStartAndEndTags(
String line,
Tag startTag,
int endTagLoc) throws ParserException {
try {
return line.substring(
startTag.elementEnd()+1,
endTagLoc
);
}
catch (Exception e) {
StringBuffer msg = new StringBuffer("Error in getCodeBetweenStartAndEndTags():\n");
msg.append("substring starts at: "+(startTag.elementEnd()+1)).append("\n");
msg.append("substring ends at: "+(endTagLoc));
throw new ParserException(msg.toString(),e);
}
}
private boolean isThisEndTagLocationFalseMatch(String line, int endTagLoc) {
if (endTagLoc+scriptScanner.getEndTag().length() > line.length()-1) return false;
char charAfterSuspectedEndTag =
line.charAt(endTagLoc+scriptScanner.getEndTag().length());
return charAfterSuspectedEndTag=='"' || charAfterSuspectedEndTag=='\'';
}
}
|
|
From: <so...@us...> - 2003-06-17 01:32:09
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners
In directory sc8-pr-cvs1:/tmp/cvs-serv5705/src/org/htmlparser/scanners
Modified Files:
ScriptScanner.java
Log Message:
created ScriptScannerHelper to allow thread safety in ScriptScanner
Index: ScriptScanner.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/ScriptScanner.java,v
retrieving revision 1.27
retrieving revision 1.28
diff -C2 -d -r1.27 -r1.28
*** ScriptScanner.java 17 Jun 2003 00:43:56 -0000 1.27
--- ScriptScanner.java 17 Jun 2003 01:32:06 -0000 1.28
***************
*** 32,35 ****
--- 32,36 ----
/////////////////////////
import org.htmlparser.*;
+ import org.htmlparser.parserHelper.*;
import org.htmlparser.tags.*;
import org.htmlparser.tags.data.*;
***************
*** 43,55 ****
private static final String MATCH_NAME [] = {"SCRIPT"};
private static final String ENDERS [] = {"BODY", "HTML"};
! private int endTagLoc;
! private Tag endTag;
! private Tag startTag;
! private int startingPos;
! private boolean sameLine;
! private boolean endTagFound;
! private NodeReader reader;
! private StringBuffer scriptContents;
public ScriptScanner() {
super("",MATCH_NAME,ENDERS);
--- 44,50 ----
private static final String MATCH_NAME [] = {"SCRIPT"};
private static final String ENDERS [] = {"BODY", "HTML"};
!
!
public ScriptScanner() {
super("",MATCH_NAME,ENDERS);
***************
*** 77,88 ****
throws ParserException {
try {
! this.reader = nodeReader;
! int startLine = reader.getLastLineNumber();
! startTag = tag;
! extractScriptTagFrom(currLine);
! if (isScriptEndTagNotFound()) {
! createScriptEndTag(tag, currLine);
! }
! return createScriptTagUsing(url, currLine, startLine);
}
--- 72,78 ----
throws ParserException {
try {
! ScriptScannerHelper helper =
! new ScriptScannerHelper(tag,url,nodeReader,currLine, this);
! return helper.scan();
}
***************
*** 92,231 ****
}
- private Tag createScriptTagUsing(String url, String currLine, int startLine) {
- return createTag(
- new TagData(
- startTag.elementBegin(),
- endTag.elementEnd(),
- startLine,
- reader.getLastLineNumber(),
- startTag.getText(),
- currLine,
- url,
- false
- ), new CompositeTagData(
- startTag,endTag,createChildrenNodeList()
- )
- );
- }
-
- private NodeList createChildrenNodeList() {
- NodeList childrenNodeList = new NodeList();
- childrenNodeList.add(
- new StringNode(
- scriptContents,
- startTag.elementEnd(),
- endTag.elementBegin()-1
- )
- );
- return childrenNodeList;
- }
-
- private void createScriptEndTag(Tag tag, String currLine) {
- // If end tag doesn't exist, create one
- String endTagName = tag.getTagName();
- int endTagBegin = reader.getLastReadPosition()+1 ;
- int endTagEnd = endTagBegin + endTagName.length() + 2;
- endTag = new EndTag(
- new TagData(
- endTagBegin,
- endTagEnd,
- endTagName,
- currLine
- )
- );
- }
-
- private boolean isScriptEndTagNotFound() {
- return endTag == null;
- }
-
- private void extractScriptTagFrom(String currLine) throws ParserException {
- String line = null;
- scriptContents = new StringBuffer();
- endTagFound = false;
-
- endTag = null;
- line = currLine;
- sameLine = true;
- startingPos = startTag.elementEnd();
- do {
- doExtractionOfScriptContentsFrom(line);
- if (!endTagFound) {
- line = reader.getNextLine();
- startingPos = 0;
- }
- if (sameLine)
- sameLine = false;
- }
- while (line!=null && !endTagFound);
- }
-
- private void doExtractionOfScriptContentsFrom(String line) throws ParserException {
- endTagLoc = line.toUpperCase().indexOf(getEndTag(),startingPos);
- findStartingAndEndingLocations(line);
-
- if (endTagLoc!=-1) {
- extractEndTagFrom(line);
- } else {
- continueParsing(line);
- }
- }
-
- private void continueParsing(String line) {
- if (sameLine)
- scriptContents.append(
- line.substring(
- startTag.elementEnd()+1
- )
- );
- else {
- scriptContents.append(Parser.getLineSeparator());
- scriptContents.append(line);
- }
- }
-
- private void extractEndTagFrom(String line) throws ParserException {
- endTagFound = true;
- endTag = (EndTag)EndTag.find(line,endTagLoc);
- if (sameLine)
- scriptContents.append(
- getCodeBetweenStartAndEndTags(
- line,
- startTag,
- endTagLoc)
- );
- else {
- scriptContents.append(Parser.getLineSeparator());
- scriptContents.append(line.substring(0,endTagLoc));
- }
-
- reader.setPosInLine(endTag.elementEnd());
- }
-
- private void findStartingAndEndingLocations(String line) {
- while (endTagLoc>0 && isThisEndTagLocationFalseMatch(line, endTagLoc)) {
- startingPos = endTagLoc+getEndTag().length();
- endTagLoc = line.toUpperCase().indexOf(getEndTag(), startingPos);
- }
- }
-
- public String getCodeBetweenStartAndEndTags(
- String line,
- Tag startTag,
- int endTagLoc) throws ParserException {
- try {
-
- return line.substring(
- startTag.elementEnd()+1,
- endTagLoc
- );
- }
- catch (Exception e) {
- StringBuffer msg = new StringBuffer("Error in getCodeBetweenStartAndEndTags():\n");
- msg.append("substring starts at: "+(startTag.elementEnd()+1)).append("\n");
- msg.append("substring ends at: "+(endTagLoc));
- throw new ParserException(msg.toString(),e);
- }
- }
/**
--- 82,85 ----
***************
*** 238,247 ****
}
! private boolean isThisEndTagLocationFalseMatch(String line, int endTagLoc) {
! if (endTagLoc+getEndTag().length() > line.length()-1) return false;
! char charAfterSuspectedEndTag =
! line.charAt(endTagLoc+getEndTag().length());
! return charAfterSuspectedEndTag=='"' || charAfterSuspectedEndTag=='\'';
! }
}
--- 92,96 ----
}
!
}
|
|
From: <so...@us...> - 2003-06-17 00:44:00
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners In directory sc8-pr-cvs1:/tmp/cvs-serv1059/src/org/htmlparser/scanners Modified Files: ScriptScanner.java Log Message: updated scriptscanner header Index: ScriptScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/ScriptScanner.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** ScriptScanner.java 17 Jun 2003 00:43:09 -0000 1.26 --- ScriptScanner.java 17 Jun 2003 00:43:56 -0000 1.27 *************** *** 1,3 **** ! // HTMLParser Library v1_3_20030518 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030601 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // |
|
From: <so...@us...> - 2003-06-17 00:43:13
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners
In directory sc8-pr-cvs1:/tmp/cvs-serv935/src/org/htmlparser/scanners
Modified Files:
ScriptScanner.java
Log Message:
fixed ScriptScanner failing tests..
Index: ScriptScanner.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/ScriptScanner.java,v
retrieving revision 1.25
retrieving revision 1.26
diff -C2 -d -r1.25 -r1.26
*** ScriptScanner.java 1 Jun 2003 20:50:12 -0000 1.25
--- ScriptScanner.java 17 Jun 2003 00:43:09 -0000 1.26
***************
*** 1,3 ****
! // HTMLParser Library v1_4_20030601 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
--- 1,3 ----
! // HTMLParser Library v1_3_20030518 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
***************
*** 28,59 ****
package org.htmlparser.scanners;
!
! import org.htmlparser.tags.ScriptTag;
! import org.htmlparser.tags.Tag;
! import org.htmlparser.tags.data.CompositeTagData;
! import org.htmlparser.tags.data.TagData;
!
/**
* The HTMLScriptScanner identifies javascript code
*/
public class ScriptScanner extends CompositeTagScanner {
private static final String MATCH_NAME [] = {"SCRIPT"};
private static final String ENDERS [] = {"BODY", "HTML"};
public ScriptScanner() {
! this("");
}
public ScriptScanner(String filter) {
! this(filter,MATCH_NAME,ENDERS);
! }
!
! public ScriptScanner(String filter, String[] nameOfTagToMatch, String[] enders) {
! this(filter,nameOfTagToMatch,enders, new String[0], true, true);
}
! public ScriptScanner(String filter, String[] nameOfTagToMatch, String[] enders, String[] endtagenders, boolean allowSelfChildren, boolean balance_quotes) {
! super(filter,nameOfTagToMatch,enders, new String[0], allowSelfChildren, balance_quotes);
}
!
public String [] getID() {
return MATCH_NAME;
--- 28,67 ----
package org.htmlparser.scanners;
! /////////////////////////
! // HTML Parser Imports //
! /////////////////////////
! import org.htmlparser.*;
! import org.htmlparser.tags.*;
! import org.htmlparser.tags.data.*;
! import org.htmlparser.util.*;
/**
* The HTMLScriptScanner identifies javascript code
*/
+
public class ScriptScanner extends CompositeTagScanner {
+ private static final String SCRIPT_END_TAG = "</SCRIPT>";
private static final String MATCH_NAME [] = {"SCRIPT"};
private static final String ENDERS [] = {"BODY", "HTML"};
+ private int endTagLoc;
+ private Tag endTag;
+ private Tag startTag;
+ private int startingPos;
+ private boolean sameLine;
+ private boolean endTagFound;
+ private NodeReader reader;
+
+ private StringBuffer scriptContents;
public ScriptScanner() {
! super("",MATCH_NAME,ENDERS);
}
public ScriptScanner(String filter) {
! super(filter,MATCH_NAME,ENDERS);
}
! public ScriptScanner(String filter, String[] nameOfTagToMatch) {
! super(filter,nameOfTagToMatch,ENDERS);
}
!
public String [] getID() {
return MATCH_NAME;
***************
*** 65,67 ****
--- 73,247 ----
return new ScriptTag(tagData,compositeTagData);
}
+
+ public Tag scan(Tag tag, String url, NodeReader nodeReader, String currLine)
+ throws ParserException {
+ try {
+ this.reader = nodeReader;
+ int startLine = reader.getLastLineNumber();
+ startTag = tag;
+ extractScriptTagFrom(currLine);
+ if (isScriptEndTagNotFound()) {
+ createScriptEndTag(tag, currLine);
+ }
+ return createScriptTagUsing(url, currLine, startLine);
+
+ }
+ catch (Exception e) {
+ throw new ParserException("Error in ScriptScanner: ",e);
+ }
+ }
+
+ private Tag createScriptTagUsing(String url, String currLine, int startLine) {
+ return createTag(
+ new TagData(
+ startTag.elementBegin(),
+ endTag.elementEnd(),
+ startLine,
+ reader.getLastLineNumber(),
+ startTag.getText(),
+ currLine,
+ url,
+ false
+ ), new CompositeTagData(
+ startTag,endTag,createChildrenNodeList()
+ )
+ );
+ }
+
+ private NodeList createChildrenNodeList() {
+ NodeList childrenNodeList = new NodeList();
+ childrenNodeList.add(
+ new StringNode(
+ scriptContents,
+ startTag.elementEnd(),
+ endTag.elementBegin()-1
+ )
+ );
+ return childrenNodeList;
+ }
+
+ private void createScriptEndTag(Tag tag, String currLine) {
+ // If end tag doesn't exist, create one
+ String endTagName = tag.getTagName();
+ int endTagBegin = reader.getLastReadPosition()+1 ;
+ int endTagEnd = endTagBegin + endTagName.length() + 2;
+ endTag = new EndTag(
+ new TagData(
+ endTagBegin,
+ endTagEnd,
+ endTagName,
+ currLine
+ )
+ );
+ }
+
+ private boolean isScriptEndTagNotFound() {
+ return endTag == null;
+ }
+
+ private void extractScriptTagFrom(String currLine) throws ParserException {
+ String line = null;
+ scriptContents = new StringBuffer();
+ endTagFound = false;
+
+ endTag = null;
+ line = currLine;
+ sameLine = true;
+ startingPos = startTag.elementEnd();
+ do {
+ doExtractionOfScriptContentsFrom(line);
+ if (!endTagFound) {
+ line = reader.getNextLine();
+ startingPos = 0;
+ }
+ if (sameLine)
+ sameLine = false;
+ }
+ while (line!=null && !endTagFound);
+ }
+
+ private void doExtractionOfScriptContentsFrom(String line) throws ParserException {
+ endTagLoc = line.toUpperCase().indexOf(getEndTag(),startingPos);
+ findStartingAndEndingLocations(line);
+
+ if (endTagLoc!=-1) {
+ extractEndTagFrom(line);
+ } else {
+ continueParsing(line);
+ }
+ }
+
+ private void continueParsing(String line) {
+ if (sameLine)
+ scriptContents.append(
+ line.substring(
+ startTag.elementEnd()+1
+ )
+ );
+ else {
+ scriptContents.append(Parser.getLineSeparator());
+ scriptContents.append(line);
+ }
+ }
+
+ private void extractEndTagFrom(String line) throws ParserException {
+ endTagFound = true;
+ endTag = (EndTag)EndTag.find(line,endTagLoc);
+ if (sameLine)
+ scriptContents.append(
+ getCodeBetweenStartAndEndTags(
+ line,
+ startTag,
+ endTagLoc)
+ );
+ else {
+ scriptContents.append(Parser.getLineSeparator());
+ scriptContents.append(line.substring(0,endTagLoc));
+ }
+
+ reader.setPosInLine(endTag.elementEnd());
+ }
+
+ private void findStartingAndEndingLocations(String line) {
+ while (endTagLoc>0 && isThisEndTagLocationFalseMatch(line, endTagLoc)) {
+ startingPos = endTagLoc+getEndTag().length();
+ endTagLoc = line.toUpperCase().indexOf(getEndTag(), startingPos);
+ }
+ }
+
+ public String getCodeBetweenStartAndEndTags(
+ String line,
+ Tag startTag,
+ int endTagLoc) throws ParserException {
+ try {
+
+ return line.substring(
+ startTag.elementEnd()+1,
+ endTagLoc
+ );
+ }
+ catch (Exception e) {
+ StringBuffer msg = new StringBuffer("Error in getCodeBetweenStartAndEndTags():\n");
+ msg.append("substring starts at: "+(startTag.elementEnd()+1)).append("\n");
+ msg.append("substring ends at: "+(endTagLoc));
+ throw new ParserException(msg.toString(),e);
+ }
+ }
+
+ /**
+ * Gets the end tag that the scanner uses to stop scanning. Subclasses of
+ * <code>ScriptScanner</code> you should override this method.
+ * @return String containing the end tag to search for, i.e. </SCRIPT>
+ */
+ public String getEndTag() {
+ return SCRIPT_END_TAG;
+ }
+
+ private boolean isThisEndTagLocationFalseMatch(String line, int endTagLoc) {
+ if (endTagLoc+getEndTag().length() > line.length()-1) return false;
+ char charAfterSuspectedEndTag =
+ line.charAt(endTagLoc+getEndTag().length());
+ return charAfterSuspectedEndTag=='"' || charAfterSuspectedEndTag=='\'';
+ }
+
}
|
|
From: <so...@us...> - 2003-06-17 00:43:13
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests
In directory sc8-pr-cvs1:/tmp/cvs-serv935/src/org/htmlparser/tests/scannersTests
Modified Files:
ScriptScannerTest.java
Log Message:
fixed ScriptScanner failing tests..
Index: ScriptScannerTest.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/ScriptScannerTest.java,v
retrieving revision 1.27
retrieving revision 1.28
diff -C2 -d -r1.27 -r1.28
*** ScriptScannerTest.java 1 Jun 2003 20:50:16 -0000 1.27
--- ScriptScannerTest.java 17 Jun 2003 00:43:08 -0000 1.28
***************
*** 117,121 ****
StringBuffer sb2 = new StringBuffer();
! sb2.append("if(navigator.appName.indexOf(\"Netscape\") != -1)\r\n");
sb2.append(" document.write ('xxx');\r\n");
sb2.append("else\r\n");
--- 117,121 ----
StringBuffer sb2 = new StringBuffer();
! sb2.append("\r\nif(navigator.appName.indexOf(\"Netscape\") != -1)\r\n");
sb2.append(" document.write ('xxx');\r\n");
sb2.append("else\r\n");
***************
*** 172,176 ****
ScriptTag scriptTag = (ScriptTag)node[0];
String scriptCode = scriptTag.getScriptCode();
! String expectedCode = "<!--\r\n"+
" function validateForm()\r\n"+
" {\r\n"+
--- 172,176 ----
ScriptTag scriptTag = (ScriptTag)node[0];
String scriptCode = scriptTag.getScriptCode();
! String expectedCode = "\r\n<!--\r\n"+
" function validateForm()\r\n"+
" {\r\n"+
***************
*** 180,184 ****
" return true;\r\n"+
" }\r\n"+
! "// -->";
assertStringEquals("Expected Code",expectedCode,scriptCode);
}
--- 180,184 ----
" return true;\r\n"+
" }\r\n"+
! "// -->\r\n";
assertStringEquals("Expected Code",expectedCode,scriptCode);
}
***************
*** 561,565 ****
public void testScanScriptWithTagsInComment() throws ParserException {
! String javascript = "// This is javascript with <li> tag in the comment\n";
createParser("<script>\n"+ javascript + "\n</script>");
parser.registerScanners();
--- 561,565 ----
public void testScanScriptWithTagsInComment() throws ParserException {
! String javascript = "// This is javascript with <li> tag in the comment";
createParser("<script>\n"+ javascript + "\n</script>");
parser.registerScanners();
***************
*** 568,572 ****
ScriptTag scriptTag = (ScriptTag)node[0];
String scriptCode = scriptTag.getScriptCode();
! assertStringEquals("Expected Code",javascript,scriptCode);
}
--- 568,578 ----
ScriptTag scriptTag = (ScriptTag)node[0];
String scriptCode = scriptTag.getScriptCode();
! String expectedCode =
! wrapLineSeperatorAround(javascript);
! assertStringEquals("Expected Code",expectedCode,scriptCode);
! }
!
! private String wrapLineSeperatorAround(String javascript) {
! return Parser.getLineSeparator()+javascript+Parser.getLineSeparator();
}
***************
*** 576,579 ****
--- 582,586 ----
"that spans multiple lines;";
createParser("<script>\n"+ javascript + "\n</script>");
+ Parser.setLineSeparator("\n");
parser.registerScanners();
parseAndAssertNodeCount(1);
***************
*** 581,585 ****
ScriptTag scriptTag = (ScriptTag)node[0];
String scriptCode = scriptTag.getScriptCode();
! assertStringEquals("Expected Code",javascript,scriptCode);
}
--- 588,595 ----
ScriptTag scriptTag = (ScriptTag)node[0];
String scriptCode = scriptTag.getScriptCode();
!
! String expectedCode =
! wrapLineSeperatorAround(javascript);
! assertStringEquals("Expected Code",expectedCode,scriptCode);
}
***************
*** 593,597 ****
ScriptTag scriptTag = (ScriptTag)node[0];
String scriptCode = scriptTag.getScriptCode();
! assertStringEquals("Expected Code",javascript,scriptCode);
}
--- 603,609 ----
ScriptTag scriptTag = (ScriptTag)node[0];
String scriptCode = scriptTag.getScriptCode();
! String expectedCode =
! wrapLineSeperatorAround(javascript);
! assertStringEquals("Expected Code",expectedCode,scriptCode);
}
|
|
From: <so...@us...> - 2003-06-17 00:43:13
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests
In directory sc8-pr-cvs1:/tmp/cvs-serv935/src/org/htmlparser/tests/parserHelperTests
Modified Files:
AttributeParserTest.java
Log Message:
fixed ScriptScanner failing tests..
Index: AttributeParserTest.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests/AttributeParserTest.java,v
retrieving revision 1.31
retrieving revision 1.32
diff -C2 -d -r1.31 -r1.32
*** AttributeParserTest.java 1 Jun 2003 20:50:14 -0000 1.31
--- AttributeParserTest.java 17 Jun 2003 00:43:09 -0000 1.32
***************
*** 168,173 ****
* Only perform this test if it's version 1.4 or higher.
*/
! public void testJspWithinAttributes()
! {
Parser parser;
--- 168,172 ----
* Only perform this test if it's version 1.4 or higher.
*/
! public void testJspWithinAttributes() {
Parser parser;
***************
*** 191,196 ****
* Only perform this test if it's version 1.4 or higher.
*/
! public void testScriptedTag ()
! {
Parser parser;
--- 190,194 ----
* Only perform this test if it's version 1.4 or higher.
*/
! public void testScriptedTag () {
Parser parser;
|
|
From: <jke...@us...> - 2003-06-13 20:27:09
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications In directory sc8-pr-cvs1:/tmp/cvs-serv17851/src/org/htmlparser/parserapplications Modified Files: LinkExtractor.java Robot.java Log Message: Renamed Node to AbstractNode, extracted the new interface, Node, and moved line separator code from AbstractNode to Parser. Index: LinkExtractor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications/LinkExtractor.java,v retrieving revision 1.33 retrieving revision 1.34 diff -C2 -d -r1.33 -r1.34 *** LinkExtractor.java 1 Jun 2003 20:50:11 -0000 1.33 --- LinkExtractor.java 13 Jun 2003 20:27:04 -0000 1.34 *************** *** 30,34 **** package org.htmlparser.parserapplications; - import org.htmlparser.Node; import org.htmlparser.Parser; --- 30,33 ---- Index: Robot.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications/Robot.java,v retrieving revision 1.37 retrieving revision 1.38 diff -C2 -d -r1.37 -r1.38 *** Robot.java 1 Jun 2003 20:50:11 -0000 1.37 --- Robot.java 13 Jun 2003 20:27:04 -0000 1.38 *************** *** 28,31 **** --- 28,32 ---- package org.htmlparser.parserapplications; + import org.htmlparser.Node; import org.htmlparser.Parser; |
|
From: <jke...@us...> - 2003-06-13 20:27:09
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners
In directory sc8-pr-cvs1:/tmp/cvs-serv17851/src/org/htmlparser/scanners
Modified Files:
TagScanner.java
Log Message:
Renamed Node to AbstractNode, extracted the new interface, Node, and moved line separator code from AbstractNode to Parser.
Index: TagScanner.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/TagScanner.java,v
retrieving revision 1.26
retrieving revision 1.27
diff -C2 -d -r1.26 -r1.27
*** TagScanner.java 1 Jun 2003 20:50:12 -0000 1.26
--- TagScanner.java 13 Jun 2003 20:27:05 -0000 1.27
***************
*** 35,38 ****
--- 35,39 ----
import java.util.Map;
+ import org.htmlparser.AbstractNode;
import org.htmlparser.Node;
import org.htmlparser.NodeReader;
***************
*** 264,268 ****
* Insert an EndTag in the currentLine, just before the occurence of the provided tag
*/
! public String insertEndTagBeforeNode(Node node, String currentLine) {
String newLine = currentLine.substring(0,node.elementBegin());
newLine += "</A>";
--- 265,269 ----
* Insert an EndTag in the currentLine, just before the occurence of the provided tag
*/
! public String insertEndTagBeforeNode(AbstractNode node, String currentLine) {
String newLine = currentLine.substring(0,node.elementBegin());
newLine += "</A>";
|
|
From: <jke...@us...> - 2003-06-13 20:27:09
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags
In directory sc8-pr-cvs1:/tmp/cvs-serv17851/src/org/htmlparser/tags
Modified Files:
OptionTag.java EndTag.java Tag.java CompositeTag.java
Log Message:
Renamed Node to AbstractNode, extracted the new interface, Node, and moved line separator code from AbstractNode to Parser.
Index: OptionTag.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/OptionTag.java,v
retrieving revision 1.19
retrieving revision 1.20
diff -C2 -d -r1.19 -r1.20
*** OptionTag.java 1 Jun 2003 20:50:13 -0000 1.19
--- OptionTag.java 13 Jun 2003 20:27:05 -0000 1.20
***************
*** 31,35 ****
import org.htmlparser.tags.data.CompositeTagData;
import org.htmlparser.tags.data.TagData;
- import org.htmlparser.util.ParserUtils;
public class OptionTag extends CompositeTag
--- 31,34 ----
Index: EndTag.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/EndTag.java,v
retrieving revision 1.19
retrieving revision 1.20
diff -C2 -d -r1.19 -r1.20
*** EndTag.java 1 Jun 2003 20:50:13 -0000 1.19
--- EndTag.java 13 Jun 2003 20:27:05 -0000 1.20
***************
*** 29,33 ****
package org.htmlparser.tags;
! import org.htmlparser.Node;
import org.htmlparser.tags.data.TagData;
import org.htmlparser.visitors.NodeVisitor;
--- 29,33 ----
package org.htmlparser.tags;
! import org.htmlparser.AbstractNode;
import org.htmlparser.tags.data.TagData;
import org.htmlparser.visitors.NodeVisitor;
***************
*** 58,62 ****
* @param position Position to start parsing from
*/
! public static Node find(String input,int position)
{
int state = ENDTAG_BEFORE_PARSING_STATE;
--- 58,62 ----
* @param position Position to start parsing from
*/
! public static AbstractNode find(String input,int position)
{
int state = ENDTAG_BEFORE_PARSING_STATE;
Index: Tag.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/Tag.java,v
retrieving revision 1.30
retrieving revision 1.31
diff -C2 -d -r1.30 -r1.31
*** Tag.java 1 Jun 2003 20:50:13 -0000 1.30
--- Tag.java 13 Jun 2003 20:27:05 -0000 1.31
***************
*** 34,38 ****
import java.util.Map;
! import org.htmlparser.Node;
import org.htmlparser.NodeReader;
import org.htmlparser.parserHelper.AttributeParser;
--- 34,38 ----
import java.util.Map;
! import org.htmlparser.AbstractNode;
import org.htmlparser.NodeReader;
import org.htmlparser.parserHelper.AttributeParser;
***************
*** 50,54 ****
* @author Kaarle Kaila 23.10.2001
*/
! public class Tag extends Node
{
public static final String TYPE = "TAG";
--- 50,54 ----
* @author Kaarle Kaila 23.10.2001
*/
! public class Tag extends AbstractNode
{
public static final String TYPE = "TAG";
***************
*** 291,300 ****
* @param reader The NodeReader that is to be used for reading the url
*/
! public Node scan(Map scanners,String url,NodeReader reader) throws ParserException
{
if (tagContents.length()==0) return this;
try {
boolean found=false;
! Node retVal=null;
// Find the first word in the scanners
String firstWord = extractWord(tagContents.toString());
--- 291,300 ----
* @param reader The NodeReader that is to be used for reading the url
*/
! public AbstractNode scan(Map scanners,String url,NodeReader reader) throws ParserException
{
if (tagContents.length()==0) return this;
try {
boolean found=false;
! AbstractNode retVal=null;
// Find the first word in the scanners
String firstWord = extractWord(tagContents.toString());
Index: CompositeTag.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/CompositeTag.java,v
retrieving revision 1.42
retrieving revision 1.43
diff -C2 -d -r1.42 -r1.43
*** CompositeTag.java 1 Jun 2003 20:50:13 -0000 1.42
--- CompositeTag.java 13 Jun 2003 20:27:05 -0000 1.43
***************
*** 30,34 ****
import org.htmlparser.*;
! import org.htmlparser.Node;
import org.htmlparser.tags.data.CompositeTagData;
import org.htmlparser.tags.data.TagData;
--- 30,34 ----
import org.htmlparser.*;
! import org.htmlparser.AbstractNode;
import org.htmlparser.tags.data.CompositeTagData;
import org.htmlparser.tags.data.TagData;
***************
*** 93,97 ****
if (prevNode.elementEnd()>node.elementBegin()) {
// Its a new line
! sb.append(lineSeparator);
}
}
--- 93,97 ----
if (prevNode.elementEnd()>node.elementBegin()) {
// Its a new line
! sb.append(Parser.getLineSeparator());
}
}
***************
*** 100,104 ****
}
if (prevNode.elementEnd()>endTag.elementBegin()) {
! sb.append(lineSeparator);
}
}
--- 100,104 ----
}
if (prevNode.elementEnd()>endTag.elementBegin()) {
! sb.append(Parser.getLineSeparator());
}
}
***************
*** 269,273 ****
StringBuffer buff = new StringBuffer();
for (SimpleNodeIterator e = children();e.hasMoreNodes();) {
! Node node = (Node)e.nextNode();
buff.append(node.toHtml());
}
--- 269,273 ----
StringBuffer buff = new StringBuffer();
for (SimpleNodeIterator e = children();e.hasMoreNodes();) {
! AbstractNode node = (AbstractNode)e.nextNode();
buff.append(node.toHtml());
}
|
|
From: <jke...@us...> - 2003-06-13 20:27:09
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests
In directory sc8-pr-cvs1:/tmp/cvs-serv17851/src/org/htmlparser/tests
Modified Files:
ParserTest.java ParserTestCase.java
Log Message:
Renamed Node to AbstractNode, extracted the new interface, Node, and moved line separator code from AbstractNode to Parser.
Index: ParserTest.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/ParserTest.java,v
retrieving revision 1.30
retrieving revision 1.31
diff -C2 -d -r1.30 -r1.31
*** ParserTest.java 1 Jun 2003 20:50:14 -0000 1.30
--- ParserTest.java 13 Jun 2003 20:27:04 -0000 1.31
***************
*** 37,40 ****
--- 37,41 ----
import java.util.Map;
+ import org.htmlparser.AbstractNode;
import org.htmlparser.Node;
import org.htmlparser.Parser;
***************
*** 90,94 ****
parser.getReader().mark(5000);
! Node [] node = new Node[500];
int i = 0;
for (NodeIterator e = parser.elements();e.hasMoreNodes();)
--- 91,95 ----
parser.getReader().mark(5000);
! Node [] node = new AbstractNode[500];
int i = 0;
for (NodeIterator e = parser.elements();e.hasMoreNodes();)
***************
*** 341,345 ****
out.close ();
parser = new Parser (file.getAbsolutePath ());
! nodes = new Node[30];
i = 0;
for (enumeration = parser.elements (); enumeration.hasMoreNodes ();)
--- 342,346 ----
out.close ();
parser = new Parser (file.getAbsolutePath ());
! nodes = new AbstractNode[30];
i = 0;
for (enumeration = parser.elements (); enumeration.hasMoreNodes ();)
***************
*** 417,421 ****
parser = new Parser(url);
i = 0;
! nodes = new Node[30];
for (NodeIterator e = parser.elements(); e.hasMoreNodes();)
nodes[i++] = e.nextNode();
--- 418,422 ----
parser = new Parser(url);
i = 0;
! nodes = new AbstractNode[30];
for (NodeIterator e = parser.elements(); e.hasMoreNodes();)
nodes[i++] = e.nextNode();
***************
*** 551,555 ****
parser = new Parser(url);
! Node node [] = new Node[30];
int i = 0;
for (NodeIterator e = parser.elements();e.hasMoreNodes();) {
--- 552,556 ----
parser = new Parser(url);
! Node node [] = new AbstractNode[30];
int i = 0;
for (NodeIterator e = parser.elements();e.hasMoreNodes();) {
Index: ParserTestCase.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/ParserTestCase.java,v
retrieving revision 1.14
retrieving revision 1.15
diff -C2 -d -r1.14 -r1.15
*** ParserTestCase.java 24 May 2003 02:05:50 -0000 1.14
--- ParserTestCase.java 13 Jun 2003 20:27:04 -0000 1.15
***************
*** 7,10 ****
--- 7,11 ----
import junit.framework.TestCase;
+ import org.htmlparser.AbstractNode;
import org.htmlparser.Node;
import org.htmlparser.NodeReader;
***************
*** 41,45 ****
reader = new NodeReader(new BufferedReader(sr),5000);
parser = new Parser(reader,new DefaultParserFeedback());
! node = new Node[40];
}
--- 42,46 ----
reader = new NodeReader(new BufferedReader(sr),5000);
parser = new Parser(reader,new DefaultParserFeedback());
! node = new AbstractNode[40];
}
***************
*** 49,53 ****
reader = new NodeReader(new BufferedReader(sr),5000);
parser = new Parser(reader,new DefaultParserFeedback());
! node = new Node[numNodes];
}
--- 50,54 ----
reader = new NodeReader(new BufferedReader(sr),5000);
parser = new Parser(reader,new DefaultParserFeedback());
! node = new AbstractNode[numNodes];
}
***************
*** 57,61 ****
reader = new NodeReader(new BufferedReader(sr),url);
parser = new Parser(reader,new DefaultParserFeedback());
! node = new Node[40];
}
--- 58,62 ----
reader = new NodeReader(new BufferedReader(sr),url);
parser = new Parser(reader,new DefaultParserFeedback());
! node = new AbstractNode[40];
}
***************
*** 65,69 ****
reader = new NodeReader(new BufferedReader(sr),url);
parser = new Parser(reader,new DefaultParserFeedback());
! node = new Node[numNodes];
}
--- 66,70 ----
reader = new NodeReader(new BufferedReader(sr),url);
parser = new Parser(reader,new DefaultParserFeedback());
! node = new AbstractNode[numNodes];
}
|