[Htmlparser-cvs] htmlparser/src/org/htmlparser StringNodeFactory.java,NONE,1.1 Parser.java,1.49,1.50
Brought to you by:
derrickoswald
|
From: <jke...@us...> - 2003-07-12 00:34:02
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser
In directory sc8-pr-cvs1:/tmp/cvs-serv15024/src/org/htmlparser
Modified Files:
Parser.java NodeReader.java StringNode.java
Added Files:
StringNodeFactory.java
Log Message:
added more support for string node factory, fixed an error in the NodeArray class
--- NEW FILE: StringNodeFactory.java ---
package org.htmlparser;
import java.io.Serializable;
import org.htmlparser.nodeDecorators.DecodingNode;
import org.htmlparser.nodeDecorators.EscapeCharacterRemovingNode;
import org.htmlparser.nodeDecorators.NonBreakingSpaceConvertingNode;
public class StringNodeFactory implements Serializable {
/**
* Flag to tell the parser to decode strings returned by StringNode's toPlainTextString.
* Decoding occurs via the method, org.htmlparser.util.Translate.decode()
*/
private boolean shouldDecodeNodes = false;
/**
* Flag to tell the parser to remove escape characters, like \n and \t, returned by StringNode's toPlainTextString.
* Escape character removal occurs via the method, org.htmlparser.util.ParserUtils.removeEscapeCharacters()
*/
private boolean shouldRemoveEscapeCharacters = false;
/**
* Flag to tell the parser to convert non breaking space
* (i.e. \u00a0) to a space (" "). If true, this will happen inside StringNode's toPlainTextString.
*/
private boolean shouldConvertNonBreakingSpace = false;
public Node createStringNode(
StringBuffer textBuffer,
int textBegin,
int textEnd) {
Node newNode = new StringNode(textBuffer, textBegin, textEnd);
if (shouldDecodeNodes())
newNode = new DecodingNode(newNode);
if (shouldRemoveEscapeCharacters())
newNode = new EscapeCharacterRemovingNode(newNode);
if (shouldConvertNonBreakingSpace())
newNode = new NonBreakingSpaceConvertingNode(newNode);
return newNode;
}
/**
* Tells the parser to decode nodes using org.htmlparser.util.Translate.decode()
*/
public void setNodeDecoding(boolean shouldDecodeNodes) {
this.shouldDecodeNodes = shouldDecodeNodes;
}
public boolean shouldDecodeNodes() {
return shouldDecodeNodes;
}
public void setEscapeCharacterRemoval(boolean shouldRemoveEscapeCharacters) {
this.shouldRemoveEscapeCharacters = shouldRemoveEscapeCharacters;
}
public boolean shouldRemoveEscapeCharacters() {
return shouldRemoveEscapeCharacters;
}
public void setNonBreakSpaceConversion(boolean shouldConvertNonBreakSpace) {
this.shouldConvertNonBreakingSpace = shouldConvertNonBreakSpace;
}
public boolean shouldConvertNonBreakingSpace() {
return shouldConvertNonBreakingSpace;
}
}
Index: Parser.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Parser.java,v
retrieving revision 1.49
retrieving revision 1.50
diff -C2 -d -r1.49 -r1.50
*** Parser.java 29 Jun 2003 12:29:30 -0000 1.49
--- Parser.java 12 Jul 2003 00:33:59 -0000 1.50
***************
*** 139,143 ****
// This is done so as to facilitate ant script processing.
! /**
* The floating point version number.
*/
--- 139,143 ----
// This is done so as to facilitate ant script processing.
! /**
* The floating point version number.
*/
***************
*** 182,203 ****
protected static final String CHARSET_STRING = "charset";
- /**
- * Flag to tell the parser to decode strings returned by StringNode's toPlainTextString.
- * Decoding occurs via the method, org.htmlparser.util.Translate.decode()
- */
- private boolean shouldDecodeNodes = false;
-
- /**
- * Flag to tell the parser to remove escape characters, like \n and \t, returned by StringNode's toPlainTextString.
- * Escape character removal occurs via the method, org.htmlparser.util.ParserUtils.removeEscapeCharacters()
- */
- private boolean shouldRemoveEscapeCharacters = false;
-
/**
! * Flag to tell the parser to convert non breaking space
! * (i.e. \u00a0) to a space (" "). If true, this will happen inside StringNode's toPlainTextString.
*/
! private boolean shouldConvertNonBreakingSpace = false;
/**
--- 182,191 ----
protected static final String CHARSET_STRING = "charset";
/**
! * This object is used by the StringParser to create new StringNodes at runtime, based on
! * use configurations of the factory
*/
! private StringNodeFactory stringNodeFactory;
/**
***************
*** 1227,1256 ****
}
! /**
! * Tells the parser to decode nodes using org.htmlparser.util.Translate.decode()
! */
! public void setNodeDecoding(boolean shouldDecodeNodes) {
! this.shouldDecodeNodes = shouldDecodeNodes;
! }
!
! public boolean shouldDecodeNodes() {
! return shouldDecodeNodes;
! }
!
! public void setEscapeCharacterRemoval(boolean shouldRemoveEscapeCharacters) {
! this.shouldRemoveEscapeCharacters = shouldRemoveEscapeCharacters;
! }
!
! public boolean shouldRemoveEscapeCharacters() {
! return shouldRemoveEscapeCharacters;
! }
!
! public void setNonBreakSpaceConversion(boolean shouldConvertNonBreakSpace) {
! this.shouldConvertNonBreakingSpace = shouldConvertNonBreakSpace;
}
! public boolean shouldConvertNonBreakingSpace() {
! return shouldConvertNonBreakingSpace;
! }
!
}
--- 1215,1226 ----
}
! public StringNodeFactory getStringNodeFactory() {
! if (stringNodeFactory == null)
! stringNodeFactory = new StringNodeFactory();
! return stringNodeFactory;
}
! public void setStringNodeFactory(StringNodeFactory stringNodeFactory) {
! this.stringNodeFactory = stringNodeFactory;
! }
}
Index: NodeReader.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/NodeReader.java,v
retrieving revision 1.35
retrieving revision 1.36
diff -C2 -d -r1.35 -r1.36
*** NodeReader.java 29 Jun 2003 12:29:30 -0000 1.35
--- NodeReader.java 12 Jul 2003 00:33:59 -0000 1.36
***************
*** 415,418 ****
this.dontReadNextLine = dontReadNextLine;
}
-
}
--- 415,417 ----
Index: StringNode.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/StringNode.java,v
retrieving revision 1.28
retrieving revision 1.29
diff -C2 -d -r1.28 -r1.29
*** StringNode.java 29 Jun 2003 12:29:30 -0000 1.28
--- StringNode.java 12 Jul 2003 00:33:59 -0000 1.29
***************
*** 30,34 ****
package org.htmlparser;
- import org.htmlparser.nodeDecorators.*;
import org.htmlparser.util.NodeList;
import org.htmlparser.visitors.NodeVisitor;
--- 30,33 ----
***************
*** 57,75 ****
this.textBuffer = textBuffer;
}
-
- public static Node createStringNode(
- StringBuffer textBuffer, int textBegin, int textEnd,
- boolean shouldDecode, boolean shouldRemoveEscapeCharacters,
- boolean shouldConvertNonBlankingSpace) {
- Node newNode = new StringNode(textBuffer, textBegin, textEnd);
- if (shouldDecode)
- newNode = new DecodingNode(newNode);
- if (shouldRemoveEscapeCharacters)
- newNode = new EscapeCharacterRemovingNode(newNode);
- if (shouldConvertNonBlankingSpace)
- newNode = new NonBreakingSpaceConvertingNode(newNode);
- return newNode;
- }
-
/**
--- 56,59 ----
|