[Htmlparser-cvs] htmlparser/src/org/htmlparser/parserHelper AttributeParser.java,1.25,1.26
Brought to you by:
derrickoswald
From: <so...@us...> - 2003-06-17 01:35:08
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper In directory sc8-pr-cvs1:/tmp/cvs-serv6124/src/org/htmlparser/parserHelper Modified Files: AttributeParser.java Log Message: refactored AttributeParser to be more readable Index: AttributeParser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/AttributeParser.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** AttributeParser.java 1 Jun 2003 20:50:10 -0000 1.25 --- AttributeParser.java 17 Jun 2003 01:35:04 -0000 1.26 *************** *** 44,55 **** */ public class AttributeParser { ! private final String delima = " \t\r\n\f=\"'>"; ! private final String delimb = " \t\r\n\f\"'>"; ! private final char doubleQuote = '\"'; ! private final char singleQuote = '\''; private String delim; /** * Method to break the tag into pieces. --- 44,68 ---- */ public class AttributeParser { ! private final String DELIMETERS = " \t\r\n\f=\"'>"; ! private final String DELIMETERS_WITHOUT_EQUALS = " \t\r\n\f\"'>"; ! private final char DOUBLE_QUOTE = '\"'; ! private final char SINGLE_QUOTE = '\''; private String delim; + private Hashtable attributeTable; + private String element; + private String name; + private String value; + private String part; + private String empty; + private boolean equal; + private StringTokenizer tokenizer; + private boolean doubleQuote; + private boolean singleQuote; + private boolean ready; + private String currentToken; + private String tokenAccumulator; /** * Method to break the tag into pieces. *************** *** 91,149 **** * */ ! public Hashtable parseAttributes(Tag tag){ ! Hashtable h = new Hashtable(); ! String element,name,value,nextPart=null; ! String empty=null; name=null; value=null; element=null; ! boolean waitingForEqual=false; ! delim=delima; ! StringTokenizer tokenizer = new StringTokenizer(tag.getText(),delim,true); while (true) { ! nextPart=getNextPart(tokenizer,delim); ! delim=delima; ! if (element==null && nextPart != null && !nextPart.equals("=")){ ! element = nextPart; ! putDataIntoTable(h,element,null,true); } else { ! if (nextPart != null && (0 < nextPart.length ())) { ! if (name == null) { ! if (!nextPart.substring(0,1).equals(" ")) { ! name = nextPart; ! waitingForEqual=true; ! } ! } ! else { ! if (waitingForEqual){ ! if (nextPart.equals("=")) { ! waitingForEqual=false; ! delim=delimb; ! } ! else { ! putDataIntoTable(h,name,"",false); ! name=nextPart; ! value=null; ! } ! } ! if (!waitingForEqual && !nextPart.equals("=")) { ! value=nextPart; ! putDataIntoTable(h,name,value,false); ! name=null; ! value=null; ! } ! } } else { ! if (name != null) { ! if (name.equals("/")) { ! putDataIntoTable(h,Tag.EMPTYTAG,"",false); ! } else { ! putDataIntoTable(h,name,"",false); ! } ! name=null; ! value=null; ! } break; } --- 104,130 ---- * */ ! public Hashtable parseAttributes(Tag tag) { ! attributeTable = new Hashtable(); ! part = null; ! empty = null; name=null; value=null; element=null; ! equal = false; ! delim=DELIMETERS; ! tokenizer = new StringTokenizer(tag.getText(),delim,true); while (true) { ! part=getNextPartUsing(delim); ! delim=DELIMETERS; ! if (element==null && part != null && !part.equals("=")){ ! element = part; ! putDataIntoTable(attributeTable,element,null,true); } else { ! if (isValid(part)) { ! process(part); } else { ! processInvalidPart(); break; } *************** *** 151,207 **** } if (null == element) // handle no tag contents ! putDataIntoTable(h,"",null,true); ! return h; } ! private String getNextPart(StringTokenizer tokenizer,String deli){ ! String tokenAccumulator=null; ! boolean isDoubleQuote=false; ! boolean isSingleQuote=false; ! boolean isDataReady=false; ! String currentToken; ! while (isDataReady == false && tokenizer.hasMoreTokens()) { ! currentToken = tokenizer.nextToken(deli); ! // ! // First let's combine tokens that are inside "" or '' ! // ! if (isDoubleQuote || isSingleQuote) { ! if (isDoubleQuote && currentToken.charAt(0)==doubleQuote){ ! isDoubleQuote= false; ! isDataReady=true; ! } else if (isSingleQuote && currentToken.charAt(0)==singleQuote) { ! isSingleQuote=false; ! isDataReady=true; ! }else { ! tokenAccumulator += currentToken; ! continue; ! } ! } else if (currentToken.charAt(0)==doubleQuote){ ! isDoubleQuote= true; ! tokenAccumulator = ""; ! continue; ! } else if (currentToken.charAt(0)==singleQuote){ ! isSingleQuote=true; ! tokenAccumulator=""; ! continue; ! } else tokenAccumulator = currentToken; ! if (tokenAccumulator.equals(currentToken)) { ! if (delim.indexOf(tokenAccumulator)>=0) { ! if (tokenAccumulator.equals("=")){ ! isDataReady=true; ! } ! } ! else { ! isDataReady=true; ! } ! } ! else isDataReady=true; } return tokenAccumulator; } --- 132,243 ---- } if (null == element) // handle no tag contents ! putDataIntoTable(attributeTable,"",null,true); ! return attributeTable; } ! private void processInvalidPart() { ! if (name != null) { ! if (name.equals("/")) { ! putDataIntoTable(attributeTable,Tag.EMPTYTAG,"",false); ! } else { ! putDataIntoTable(attributeTable,name,"",false); ! } ! name=null; ! value=null; ! } ! } ! private boolean isValid(String part) { ! return part != null && (0 < part.length ()); ! } ! private void process(String part) { ! if (name == null) { ! if (!part.substring(0,1).equals(" ")) { ! name = part; ! equal=true; ! } ! } ! else { ! if (equal){ ! if (part.equals("=")) { ! equal=false; ! delim=DELIMETERS_WITHOUT_EQUALS; ! } ! else { ! putDataIntoTable(attributeTable,name,"",false); ! name=part; ! value=null; ! } ! } ! if (!equal && !part.equals("=")) { ! value=part; ! putDataIntoTable(attributeTable,name,value,false); ! name=null; ! value=null; ! } ! } ! } ! private String getNextPartUsing(String delimiter) { ! tokenAccumulator = null; ! doubleQuote = false; ! singleQuote = false; ! ready = false; ! while (ready == false && tokenizer.hasMoreTokens()) { ! currentToken = tokenizer.nextToken(delimiter); + if (doubleQuote || singleQuote) { + combineTokensInsideSingleOrDoubleQuotes(); + } else if (isCurrentTokenDoubleQuote()){ + doubleQuote= true; + tokenAccumulator = ""; + } else if (isCurrentTokenSingleQuote()){ + singleQuote=true; + tokenAccumulator=""; + } else { + tokenAccumulator = currentToken; + ready = isReadyWithNextPart(currentToken); + } } return tokenAccumulator; } + + private boolean isReadyWithNextPart(String currentToken) { + boolean ready = false; + if (isDelimeter(currentToken)) { + if (currentToken.equals("=")){ + ready=true; + } + } + else { + ready=true; + } + return ready; + } + + private boolean isDelimeter(String token) { + return delim.indexOf(tokenAccumulator)>=0; + } + + private boolean isCurrentTokenSingleQuote() { + return currentToken.charAt(0)==SINGLE_QUOTE; + } + + private boolean isCurrentTokenDoubleQuote() { + return currentToken.charAt(0)==DOUBLE_QUOTE; + } + + private void combineTokensInsideSingleOrDoubleQuotes() { + if (doubleQuote && currentToken.charAt(0)==DOUBLE_QUOTE){ + doubleQuote= false; + ready=true; + } else if (singleQuote && currentToken.charAt(0)==SINGLE_QUOTE) { + singleQuote=false; + ready=true; + }else { + tokenAccumulator += currentToken; + } + } |