[Htmlparser-cvs] htmlparser/src/org/htmlparser/parserHelper AttributeParser.java,1.25,1.26
Brought to you by:
derrickoswald
|
From: <so...@us...> - 2003-06-17 01:35:08
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper
In directory sc8-pr-cvs1:/tmp/cvs-serv6124/src/org/htmlparser/parserHelper
Modified Files:
AttributeParser.java
Log Message:
refactored AttributeParser to be more readable
Index: AttributeParser.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/AttributeParser.java,v
retrieving revision 1.25
retrieving revision 1.26
diff -C2 -d -r1.25 -r1.26
*** AttributeParser.java 1 Jun 2003 20:50:10 -0000 1.25
--- AttributeParser.java 17 Jun 2003 01:35:04 -0000 1.26
***************
*** 44,55 ****
*/
public class AttributeParser {
! private final String delima = " \t\r\n\f=\"'>";
! private final String delimb = " \t\r\n\f\"'>";
! private final char doubleQuote = '\"';
! private final char singleQuote = '\'';
private String delim;
/**
* Method to break the tag into pieces.
--- 44,68 ----
*/
public class AttributeParser {
! private final String DELIMETERS = " \t\r\n\f=\"'>";
! private final String DELIMETERS_WITHOUT_EQUALS = " \t\r\n\f\"'>";
! private final char DOUBLE_QUOTE = '\"';
! private final char SINGLE_QUOTE = '\'';
private String delim;
+ private Hashtable attributeTable;
+ private String element;
+ private String name;
+ private String value;
+ private String part;
+ private String empty;
+ private boolean equal;
+ private StringTokenizer tokenizer;
+ private boolean doubleQuote;
+ private boolean singleQuote;
+ private boolean ready;
+ private String currentToken;
+ private String tokenAccumulator;
/**
* Method to break the tag into pieces.
***************
*** 91,149 ****
*
*/
! public Hashtable parseAttributes(Tag tag){
! Hashtable h = new Hashtable();
! String element,name,value,nextPart=null;
! String empty=null;
name=null;
value=null;
element=null;
! boolean waitingForEqual=false;
! delim=delima;
! StringTokenizer tokenizer = new StringTokenizer(tag.getText(),delim,true);
while (true) {
! nextPart=getNextPart(tokenizer,delim);
! delim=delima;
! if (element==null && nextPart != null && !nextPart.equals("=")){
! element = nextPart;
! putDataIntoTable(h,element,null,true);
}
else {
! if (nextPart != null && (0 < nextPart.length ())) {
! if (name == null) {
! if (!nextPart.substring(0,1).equals(" ")) {
! name = nextPart;
! waitingForEqual=true;
! }
! }
! else {
! if (waitingForEqual){
! if (nextPart.equals("=")) {
! waitingForEqual=false;
! delim=delimb;
! }
! else {
! putDataIntoTable(h,name,"",false);
! name=nextPart;
! value=null;
! }
! }
! if (!waitingForEqual && !nextPart.equals("=")) {
! value=nextPart;
! putDataIntoTable(h,name,value,false);
! name=null;
! value=null;
! }
! }
}
else {
! if (name != null) {
! if (name.equals("/")) {
! putDataIntoTable(h,Tag.EMPTYTAG,"",false);
! } else {
! putDataIntoTable(h,name,"",false);
! }
! name=null;
! value=null;
! }
break;
}
--- 104,130 ----
*
*/
! public Hashtable parseAttributes(Tag tag) {
! attributeTable = new Hashtable();
! part = null;
! empty = null;
name=null;
value=null;
element=null;
! equal = false;
! delim=DELIMETERS;
! tokenizer = new StringTokenizer(tag.getText(),delim,true);
while (true) {
! part=getNextPartUsing(delim);
! delim=DELIMETERS;
! if (element==null && part != null && !part.equals("=")){
! element = part;
! putDataIntoTable(attributeTable,element,null,true);
}
else {
! if (isValid(part)) {
! process(part);
}
else {
! processInvalidPart();
break;
}
***************
*** 151,207 ****
}
if (null == element) // handle no tag contents
! putDataIntoTable(h,"",null,true);
! return h;
}
! private String getNextPart(StringTokenizer tokenizer,String deli){
! String tokenAccumulator=null;
! boolean isDoubleQuote=false;
! boolean isSingleQuote=false;
! boolean isDataReady=false;
! String currentToken;
! while (isDataReady == false && tokenizer.hasMoreTokens()) {
! currentToken = tokenizer.nextToken(deli);
! //
! // First let's combine tokens that are inside "" or ''
! //
! if (isDoubleQuote || isSingleQuote) {
! if (isDoubleQuote && currentToken.charAt(0)==doubleQuote){
! isDoubleQuote= false;
! isDataReady=true;
! } else if (isSingleQuote && currentToken.charAt(0)==singleQuote) {
! isSingleQuote=false;
! isDataReady=true;
! }else {
! tokenAccumulator += currentToken;
! continue;
! }
! } else if (currentToken.charAt(0)==doubleQuote){
! isDoubleQuote= true;
! tokenAccumulator = "";
! continue;
! } else if (currentToken.charAt(0)==singleQuote){
! isSingleQuote=true;
! tokenAccumulator="";
! continue;
! } else tokenAccumulator = currentToken;
! if (tokenAccumulator.equals(currentToken)) {
! if (delim.indexOf(tokenAccumulator)>=0) {
! if (tokenAccumulator.equals("=")){
! isDataReady=true;
! }
! }
! else {
! isDataReady=true;
! }
! }
! else isDataReady=true;
}
return tokenAccumulator;
}
--- 132,243 ----
}
if (null == element) // handle no tag contents
! putDataIntoTable(attributeTable,"",null,true);
! return attributeTable;
}
! private void processInvalidPart() {
! if (name != null) {
! if (name.equals("/")) {
! putDataIntoTable(attributeTable,Tag.EMPTYTAG,"",false);
! } else {
! putDataIntoTable(attributeTable,name,"",false);
! }
! name=null;
! value=null;
! }
! }
! private boolean isValid(String part) {
! return part != null && (0 < part.length ());
! }
! private void process(String part) {
! if (name == null) {
! if (!part.substring(0,1).equals(" ")) {
! name = part;
! equal=true;
! }
! }
! else {
! if (equal){
! if (part.equals("=")) {
! equal=false;
! delim=DELIMETERS_WITHOUT_EQUALS;
! }
! else {
! putDataIntoTable(attributeTable,name,"",false);
! name=part;
! value=null;
! }
! }
! if (!equal && !part.equals("=")) {
! value=part;
! putDataIntoTable(attributeTable,name,value,false);
! name=null;
! value=null;
! }
! }
! }
! private String getNextPartUsing(String delimiter) {
! tokenAccumulator = null;
! doubleQuote = false;
! singleQuote = false;
! ready = false;
! while (ready == false && tokenizer.hasMoreTokens()) {
! currentToken = tokenizer.nextToken(delimiter);
+ if (doubleQuote || singleQuote) {
+ combineTokensInsideSingleOrDoubleQuotes();
+ } else if (isCurrentTokenDoubleQuote()){
+ doubleQuote= true;
+ tokenAccumulator = "";
+ } else if (isCurrentTokenSingleQuote()){
+ singleQuote=true;
+ tokenAccumulator="";
+ } else {
+ tokenAccumulator = currentToken;
+ ready = isReadyWithNextPart(currentToken);
+ }
}
return tokenAccumulator;
}
+
+ private boolean isReadyWithNextPart(String currentToken) {
+ boolean ready = false;
+ if (isDelimeter(currentToken)) {
+ if (currentToken.equals("=")){
+ ready=true;
+ }
+ }
+ else {
+ ready=true;
+ }
+ return ready;
+ }
+
+ private boolean isDelimeter(String token) {
+ return delim.indexOf(tokenAccumulator)>=0;
+ }
+
+ private boolean isCurrentTokenSingleQuote() {
+ return currentToken.charAt(0)==SINGLE_QUOTE;
+ }
+
+ private boolean isCurrentTokenDoubleQuote() {
+ return currentToken.charAt(0)==DOUBLE_QUOTE;
+ }
+
+ private void combineTokensInsideSingleOrDoubleQuotes() {
+ if (doubleQuote && currentToken.charAt(0)==DOUBLE_QUOTE){
+ doubleQuote= false;
+ ready=true;
+ } else if (singleQuote && currentToken.charAt(0)==SINGLE_QUOTE) {
+ singleQuote=false;
+ ready=true;
+ }else {
+ tokenAccumulator += currentToken;
+ }
+ }
|