[Htmlparser-cvs] htmlparser/src/org/htmlparser/scanners LinkScanner.java,1.38,1.39 TagScanner.java,1
Brought to you by:
derrickoswald
|
From: <der...@us...> - 2003-05-17 12:12:53
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners
In directory sc8-pr-cvs1:/tmp/cvs-serv30035/org/htmlparser/scanners
Modified Files:
LinkScanner.java TagScanner.java
Log Message:
Fix tab handling on the suggestion of oyoaha (philippe blanc).
Rewrite some string handling methods to remove gross inefficiencies.
Index: LinkScanner.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/LinkScanner.java,v
retrieving revision 1.38
retrieving revision 1.39
diff -C2 -d -r1.38 -r1.39
*** LinkScanner.java 12 May 2003 01:37:45 -0000 1.38
--- LinkScanner.java 17 May 2003 12:12:49 -0000 1.39
***************
*** 118,140 ****
* scan has begun, and hence allows us to write scanners that can work with dirty html
*/
! public boolean evaluate(String s,TagScanner previousOpenScanner)
! {
! // Eat up leading blanks
! s = absorbLeadingBlanks(s);
! boolean retVal;
! char ch = s.charAt(0);
!
! if (s.length()<5) retVal = false; else
! if ((ch=='a' || ch=='A') && (s.charAt(1)==' ' || s.charAt(1)=='\n' || s.charAt(1)=='\r')) retVal = true; else retVal = false;
! if (retVal)
! {
! if (s.toUpperCase().indexOf("HREF")==-1)
! retVal=false;
! }
- return retVal;
-
- }
/**
* Extract the link from the given string. The URL of the actual html page is also
--- 118,142 ----
* scan has begun, and hence allows us to write scanners that can work with dirty html
*/
! public boolean evaluate (String s, TagScanner previousOpenScanner)
! {
! char ch;
! boolean ret;
!
! // eat up leading blanks
! s = absorbLeadingBlanks (s);
! if (5 > s.length ())
! ret = false;
! else
! {
! ch = s.charAt (0);
! if ((ch=='a' || ch=='A') && Character.isWhitespace (s.charAt (1)))
! ret = -1 != s.toUpperCase().indexOf ("HREF");
! else
! ret = false;
! }
! return (ret);
! }
/**
* Extract the link from the given string. The URL of the actual html page is also
Index: TagScanner.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/TagScanner.java,v
retrieving revision 1.21
retrieving revision 1.22
diff -C2 -d -r1.21 -r1.22
*** TagScanner.java 12 May 2003 01:37:45 -0000 1.21
--- TagScanner.java 17 May 2003 12:12:49 -0000 1.22
***************
*** 103,119 ****
/**
! * Insert the method's description here.
! * Creation date: (6/18/2001 2:15:02 AM)
! * @return java.lang.String
*/
! public static String absorbLeadingBlanks(String s)
! {
! String temp = new String(s);
! while (temp.length()!=0 && temp.charAt(0)==' ')
! {
! temp = temp.substring(1,temp.length());
! }
! return temp;
! }
/**
--- 103,129 ----
/**
! * Remove whitespace from the front of the given string.
! * @param s The string to trim.
! * @return Either the same string or a string with whitespace chopped off.
*/
! public static String absorbLeadingBlanks (String s)
! {
! int length;
! int i;
! String ret;
!
! i = 0;
! length = s.length ();
! while (i < length && Character.isWhitespace (s.charAt (i)))
! i++;
! if (0 == i)
! ret = s;
! else if (length == i)
! ret = "";
! else
! ret = s.substring (i);
!
! return (ret);
! }
/**
|