// Check for the node or tag that you want
// if(node instanceof ...) {
//Downcast, and process
//recursively (nodes within nodes)
//}
} //3
for(int i=0;i<resultlist.size();i++){ //7
//
System.out.println(resultlist.elementAt(i).toHtml());<--------didn't output formal html file =.=
}//7
regards:
I use suggested design.=.=
It's me again.
<-----------------
is my question.
Did I miss some thing important?...@@.
Any suggestion is welcome.
thank you
May goodness be with you all
--------------------------------------------------
code ParserNodeIterator.java
--------------------------------------------------
import org.htmlparser.Parser;
import org.htmlparser.util.NodeIterator;
import org.htmlparser.util.*;
import org.htmlparser.util.ParserException;
import org.htmlparser.visitors.HtmlPage;
import org.htmlparser.tags.*;
import org.htmlparser.visitors.NodeVisitor;
import org.htmlparser.*;
import org.htmlparser.filters.*;
import org.htmlparser.filters.*;
import java.io.*;
import java.util.Vector;
import java.util.*;
import org.htmlparser.Attribute;
import org.htmlparser.nodes.*;
import org.htmlparser.util.*;
public class ParserNodeIterator{//1
public static void main (String[] args) throws ParserException
{//2
MyVisitor visitor = new MyVisitor();
NodeList resultlist = new NodeList();
Parser parser = new Parser("http://www.yzu.edu.tw");
NodeFilter filter=new NotFilter(new TagNameFilter("Script"));
parser.parse(filter); <-----------------didn't filter out Script tag
for(NodeIterator i = parser.elements();i.hasMoreNodes();) { //3
Node node =(Node)i.nextNode();
if(node instanceof TextNode) { //6
//Downcasting to StringNode
TextNode textNode = (TextNode)node;
// Do whatever processing you want with the string node
// System.out.println(textNode.toHtml());
}//6
/*
if(node instanceof TagNode) {
TagNode tagNode = (TagNode)node;
// Do whatever processing you want with the string node
//
System.out.println(stringNode.toHtml());
}
*/
if(node instanceof TagNode){ //5
tagNode.accept(visitor);
}//5
if(node instanceof RemarkNode) { //4
RemarkNode remarkNode = (RemarkNode)node;
}//4
resultlist.add(iterator.nextNode());
// Check for the node or tag that you want
// if(node instanceof ...) {
//Downcast, and process
//recursively (nodes within nodes)
//}
} //3
for(int i=0;i<resultlist.size();i++){ //7
//
System.out.println(resultlist.elementAt(i).toHtml());<--------didn't output formal html file =.=
}//7
}//2
}//1
--------------------------------------------------
code MyVisitor.java
--------------------------------------------------
import org.htmlparser.Parser;
import org.htmlparser.util.NodeIterator;
import org.htmlparser.util.*;
import org.htmlparser.util.ParserException;
import org.htmlparser.visitors.HtmlPage;
import org.htmlparser.tags.*;
import org.htmlparser.visitors.NodeVisitor;
import org.htmlparser.*;
import org.htmlparser.Attribute;
import org.htmlparser.filters.*;
import org.htmlparser.filters.*;
import java.io.*;
import java.util.Vector;
import java.util.*;
import org.htmlparser.sax.Attributes;
import org.xml.sax.*;
public class MyVisitor extends NodeVisitor
{
public MyVisitor()
{
}
public void visitTag(Tag tag)
{
Vector attributes;
Attribute attribute;
attributes = tag.getAttributesEx();
attribute = (Attribute)attributes.elementAt(0);
attribute.setName(attribute.getName().toLowerCase());
Vector AttributeVector2;
AttributeVector2=tag.getAttributesEx();
int Vectorsize = AttributeVector2.size();
for(int i=0;i<Vectorsize;i++){
// StringBuffer TestJavaScript;
// AttributeVector2.elementAt(i).getAssignment(TestJavaScript);
/* if(TestJavaScript.toString().startsWith("JavaScript")){
{
AttributeVector2.remove(i);
}
*/
// System.out.print(tag.toHtml());
char MyQuote='"';
Vector AttributeVector;
AttributeVector=tag.getAttributesEx();
for (Enumeration e = AttributeVector.elements() ; e.hasMoreElements();) {
((Attribute)e).setQuote(MyQuote);
}
if(tag instanceof LinkTag){
//System.out.print(tag.getAttribute("href"));
//System.out.print("****");
tag.setAttribute("href","http://www.yzu.edu.tw/"+tag.getAttribute("href"));
// System.out.print(tag.getAttribute("href"));
// System.out.print("////");
}
else if (tag instanceof DoctypeTag){
//System.out.print(tag.toHtml());
}
else if(tag instanceof ImageTag){
tag.setAttribute("alt","*");
//System.out.print(tag.toHtml());
//System.out.print(tag.getAttribute("src"));
//System.out.print("****");
tag.setAttribute("src","http://www.yzu.edu.tw"+tag.getAttribute("src"));
//System.out.print(tag.getAttribute("src"));
// System.out.print("////");
}
else if (tag instanceof Html){
tag.setAttribute("xmlns","http://www.w3.org/1999/xhtml");
tag.setAttribute("xml:lang","en-US");
tag.setAttribute("lang","en-US");
String DOCTYPEHEADER1="<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"";
String DOCTYPEHEADER2="\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">";
String DOCTYPEHEADER=DOCTYPEHEADER1+DOCTYPEHEADER2;
// System.out.print(DOCTYPEHEADER);
// System.out.print(tag.toString());
//<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US" lang="en-US">
// Set attribute with given key, value pair.
}
else if(tag instanceof StyleTag){
tag.setAttribute("type","text/css",'"');}}
// System.out.print(tag.toHtml());
}
}