Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexerapplications/thumbelina
In directory sc8-pr-cvs1:/tmp/cvs-serv25697/lexerapplications/thumbelina
Modified Files:
Thumbelina.java
Log Message:
Made visiting order the same order as on the page.
The 'shouldRecurseSelf' boolean of NodeVisitor could probably
be removed since it doesn't make much sense any more.
Fixed StringBean, which was still looking for end tags with names starting with
a slash, i.e. "/SCRIPT", silly beany.
Added some debugging support to the lexer, you can easily base a breakpoint on
line number.
Index: Thumbelina.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexerapplications/thumbelina/Thumbelina.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** Thumbelina.java 26 Oct 2003 16:44:01 -0000 1.2
--- Thumbelina.java 4 Nov 2003 01:25:02 -0000 1.3
***************
*** 1076,1080 ****
urls = getImageLinks (link);
fetch (urls[0]);
! append (filter (urls[1]));
setCurrentURL (null);
}
--- 1076,1087 ----
urls = getImageLinks (link);
fetch (urls[0]);
! //append (filter (urls[1]));
! synchronized (mEnqueuers)
! {
! Enqueuer enqueuer = new Enqueuer (urls[1]);
! enqueuer.setPriority (Thread.MIN_PRIORITY);
! mEnqueuers.add (enqueuer);
! enqueuer.start ();
! }
setCurrentURL (null);
}
***************
*** 1092,1095 ****
--- 1099,1122 ----
}
+ static ArrayList mEnqueuers = new ArrayList ();
+
+ class Enqueuer extends Thread
+ {
+ URL[] mList;
+
+ public Enqueuer (URL[] list)
+ {
+ mList = list;
+ }
+
+ public void run ()
+ {
+ append (filter (mList));
+ synchronized (mEnqueuers)
+ {
+ mEnqueuers.remove (this);
+ }
+ }
+ }
//
// ItemListener interface
***************
*** 1427,1430 ****
--- 1454,1466 ----
*
* $Log$
+ * Revision 1.3 2003/11/04 01:25:02 derrickoswald
+ * Made visiting order the same order as on the page.
+ * The 'shouldRecurseSelf' boolean of NodeVisitor could probably
+ * be removed since it doesn't make much sense any more.
+ * Fixed StringBean, which was still looking for end tags with names starting with
+ * a slash, i.e. "/SCRIPT", silly beany.
+ * Added some debugging support to the lexer, you can easily base a breakpoint on
+ * line number.
+ *
* Revision 1.2 2003/10/26 16:44:01 derrickoswald
* Get thumbelina working again. The tag.getName() method doesn't include the / of end tags.
|