Thread: [Htmlparser-user] Change Attributes of TDs and TRs
Brought to you by:
derrickoswald
From: Fuhrmann, M. <mic...@sa...> - 2006-01-11 13:26:16
|
Hi All! I want to change several attributes of the td and tr tags of certain = tables but I don't know if do it the right way. The problem is that I find the right table (only tables with ids) but I = don't reach the td or tr tags.... My code looks like that: public void cleanDokument(HttpServletRequest request,HttpServletResponse = response) throws IOException {=09 // Get the calling HTML Document define the Writer and open the = connection URLConnection connection; URL request_url =3D new URL(request.getHeader("referer").toString()); PrintWriter out =3D response.getWriter(); connection =3D (HttpURLConnection)request_url.openConnection (); =09 try { Parser parser =3D new Parser (); parser.setConnection(connection); =20 NodeFilter all_tables =3D new TagNameFilter("table"); NodeList list =3D parser.parse (all_tables); Node[] nodelist; // Seperate all table tags for (NodeIterator e =3D parser.elements (); e.hasMoreNodes ();) e.nextNode ().collectInto (list,all_tables); =20 nodelist=3Dlist.toNodeArray(); =20 for (int h=3D0; h<nodelist.length;h++)=20 { if (nodelist[h] instanceof TableTag)=20 {=09 //for schleife f=FCr die td's und tr's=09 if(((TableTag)nodelist[h]).getAttribute("id")!=3D null)=20 { for (int i=3D0; i<nodelist.length; i++) { out.println(nodelist.toString()); if(nodelist[i] instanceof TableRow) { out.println("Row found!"); ((TableRow)nodelist[i]).removeAttribute ("nowrap"); } else if (nodelist[i] instanceof TableColumn) { out.println("Column found!"); ((TableColumn)nodelist[i]).removeAttribute ("nowrap"); } } out.println(nodelist[h].toHtml()); } } else if(nodelist[h] instanceof TableRow || nodelist[h] instanceof = TableColumn) { out.println("Else erreicht!"); out.println(((TableRow)nodelist[h]).getText()); } } //makePdf(out,response); } catch(Exception e) { out.println("Fehler beim Parsen!"); e.printStackTrace(out); } } Does my nodelist contain the tr and td tags? Is it right to say = instanceof TableRow???? Many thanks and best regards Michael |
From: Third E. <nav...@gm...> - 2006-01-11 13:31:52
|
VGFibGUgdGFnIG9iamVjdCBhbHJlYWR5IGhhcyBhIGZ1Y250aW9uIHRvIGdldCB0aGUgcm93cyBh bmQgVGFibGVSb3cKaGFzIGZ1bmN0aW9uIHRvIGdldCBjb2x1bW5zLiBZb3UgZG9uJ3QgbmVlZCB0 byBpdGVyYXRlIHlvdXJzZWxmLgoKT24gMS8xMS8wNiwgRnVocm1hbm4sIE1pY2hhZWwgPG1pY2hh ZWwuZnVocm1hbm5Ac2FwLmNvbT4gd3JvdGU6Cj4KPgo+IEhpIEFsbCEKPgo+IEkgd2FudCB0byBj aGFuZ2Ugc2V2ZXJhbCBhdHRyaWJ1dGVzIG9mIHRoZSB0ZCBhbmQgdHIgdGFncyBvZiBjZXJ0YWlu IHRhYmxlcwo+IGJ1dCBJIGRvbid0IGtub3cgaWYgZG8gaXQgdGhlIHJpZ2h0IHdheS4KPiBUaGUg cHJvYmxlbSBpcyB0aGF0IEkgZmluZCB0aGUgcmlnaHQgdGFibGUgKG9ubHkgdGFibGVzIHdpdGgg aWRzKSBidXQgSQo+IGRvbid0IHJlYWNoIHRoZSB0ZCBvciB0ciB0YWdz4oCmLgo+IE15IGNvZGUg bG9va3MgbGlrZSB0aGF0Ogo+Cj4gcHVibGljIHZvaWQgY2xlYW5Eb2t1bWVudChIdHRwU2Vydmxl dFJlcXVlc3QKPiByZXF1ZXN0LEh0dHBTZXJ2bGV0UmVzcG9uc2UgcmVzcG9uc2UpIHRocm93cyBJ T0V4Y2VwdGlvbgo+ICAgICAgICAgewo+ICAgICAgICAgICAgICAgICAvLyBHZXQgdGhlIGNhbGxp bmcgSFRNTCBEb2N1bWVudCBkZWZpbmUgdGhlIFdyaXRlciBhbmQgb3Blbgo+IHRoZSBjb25uZWN0 aW9uCj4gICAgICAgICAgICAgICAgIFVSTENvbm5lY3Rpb24gY29ubmVjdGlvbjsKPiAgICAgICAg ICAgICAgICAgVVJMIHJlcXVlc3RfdXJsID0gbmV3Cj4gVVJMKHJlcXVlc3QuZ2V0SGVhZGVyKCJy ZWZlcmVyIikudG9TdHJpbmcoKSk7Cj4KPiAgICAgICAgICAgICAgICAgUHJpbnRXcml0ZXIgb3V0 ID0gcmVzcG9uc2UuZ2V0V3JpdGVyKCk7Cj4gICAgICAgICAgICAgICAgIGNvbm5lY3Rpb24gPQo+ IChIdHRwVVJMQ29ubmVjdGlvbilyZXF1ZXN0X3VybC5vcGVuQ29ubmVjdGlvbiAoKTsKPgo+ICAg ICAgICAgICAgICAgICB0cnkKPiAgICAgICAgICAgICAgICAgewo+ICAgICAgICAgICAgICAgICAg ICBQYXJzZXIgcGFyc2VyID0gbmV3IFBhcnNlciAoKTsKPiAgICAgICAgICAgICAgICAgICAgcGFy c2VyLnNldENvbm5lY3Rpb24oY29ubmVjdGlvbik7Cj4KPiAgICAgICAgICAgICAgICAgICAgTm9k ZUZpbHRlciBhbGxfdGFibGVzID0gbmV3IFRhZ05hbWVGaWx0ZXIoInRhYmxlIik7Cj4gICAgICAg ICAgICAgICAgICAgIE5vZGVMaXN0IGxpc3QgPSBwYXJzZXIucGFyc2UgKGFsbF90YWJsZXMpOwo+ ICAgICAgICAgICAgICAgICAgICBOb2RlW10gbm9kZWxpc3Q7Cj4KPiAgICAgICAgICAgIC8vIFNl cGVyYXRlIGFsbCB0YWJsZSB0YWdzCj4gICAgICAgICAgICAgICAgICAgIGZvciAoTm9kZUl0ZXJh dG9yIGUgPSBwYXJzZXIuZWxlbWVudHMgKCk7IGUuaGFzTW9yZU5vZGVzCj4gKCk7KQo+ICAgICAg ICAgICAgICAgICAgICBlLm5leHROb2RlICgpLmNvbGxlY3RJbnRvIChsaXN0LGFsbF90YWJsZXMp Owo+Cj4gICAgICAgICAgICAgICAgICAgIG5vZGVsaXN0PWxpc3QudG9Ob2RlQXJyYXkoKTsKPgo+ ICAgICAgICAgICAgICAgICAgICBmb3IgKGludCBoPTA7IGg8bm9kZWxpc3QubGVuZ3RoO2grKykK PiAgICAgICAgICAgICAgICAgICAgewo+ICAgICAgICAgICAgICAgICAgICAgICAgIGlmIChub2Rl bGlzdFtoXSBpbnN0YW5jZW9mIFRhYmxlVGFnKQo+ICAgICAgICAgICAgICAgICAgICAgICAgIHsK PiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIC8vZm9yIHNjaGxlaWZlIGbvv71yIGRp ZSB0ZCdzIHVuZCB0cidzCj4KPiBpZigoKFRhYmxlVGFnKW5vZGVsaXN0W2hdKS5nZXRBdHRyaWJ1 dGUoImlkIikhPSBudWxsKQo+ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgewo+ICAg ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBmb3IgKGludCBpPTA7IGk8bm9k ZWxpc3QubGVuZ3RoOwo+IGkrKykKPiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg ICAgICAgewo+Cj4gb3V0LnByaW50bG4obm9kZWxpc3QudG9TdHJpbmcoKSk7Cj4gICAgICAgICAg ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgaWYobm9kZWxpc3RbaV0gaW5z dGFuY2VvZgo+IFRhYmxlUm93KQo+ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg ICAgICAgICAgICAgIHsKPiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg ICAgICAgICAgICAgICAgIG91dC5wcmludGxuKCJSb3cKPiBmb3VuZCEiKTsKPgo+ICgoVGFibGVS b3cpbm9kZWxpc3RbaV0pLnJlbW92ZUF0dHJpYnV0ZSAoIm5vd3JhcCIpOwo+ICAgICAgICAgICAg ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIH0KPiAgICAgICAgICAgICAgICAg ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBlbHNlIGlmIChub2RlbGlzdFtpXQo+IGlu c3RhbmNlb2YgVGFibGVDb2x1bW4pCj4gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg ICAgICAgICAgICAgICAgewo+ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg ICAgICAgICAgICAgICAgICAgb3V0LnByaW50bG4oIkNvbHVtbgo+IGZvdW5kISIpOwo+Cj4gKChU YWJsZUNvbHVtbilub2RlbGlzdFtpXSkucmVtb3ZlQXR0cmlidXRlICgibm93cmFwIik7Cj4gICAg ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgfQo+ICAgICAgICAg ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB9Cj4gICAgICAgICAgICAgICAgICAgICAg ICAgICAgICAgICAgICAgICAgIG91dC5wcmludGxuKG5vZGVsaXN0W2hdLnRvSHRtbCgpKTsKPiAg ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIH0KPiAgICAgICAgICAgICAgICAgICAgICAg ICB9Cj4gICAgICAgICAgICAgICAgICAgICAgICAgZWxzZSBpZihub2RlbGlzdFtoXSBpbnN0YW5j ZW9mIFRhYmxlUm93IHx8Cj4gbm9kZWxpc3RbaF0gaW5zdGFuY2VvZiBUYWJsZUNvbHVtbikKPiAg ICAgICAgICAgICAgICAgICAgICAgICB7Cj4gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg ICBvdXQucHJpbnRsbigiRWxzZSBlcnJlaWNodCEiKTsKPgo+IG91dC5wcmludGxuKCgoVGFibGVS b3cpbm9kZWxpc3RbaF0pLmdldFRleHQoKSk7Cj4gICAgICAgICAgICAgICAgICAgICAgICAgfQo+ ICAgICAgICAgICAgICAgICAgICB9Cj4gICAgICAgICAgICAgICAgICAgIC8vbWFrZVBkZihvdXQs cmVzcG9uc2UpOwo+ICAgICAgICAgICAgICAgICB9Cj4gICAgICAgICAgICAgICAgIGNhdGNoKEV4 Y2VwdGlvbiBlKQo+ICAgICAgICAgICAgICAgICB7Cj4gICAgICAgICAgICAgICAgICAgICAgICAg b3V0LnByaW50bG4oIkZlaGxlciBiZWltIFBhcnNlbiEiKTsKPiAgICAgICAgICAgICAgICAgICAg ICAgICBlLnByaW50U3RhY2tUcmFjZShvdXQpOwo+ICAgICAgICAgICAgICAgICB9Cj4gICAgICAg ICB9Cj4KPiBEb2VzIG15IG5vZGVsaXN0IGNvbnRhaW4gdGhlIHRyIGFuZCB0ZCB0YWdzPyBJcyBp dCByaWdodCB0byBzYXkgaW5zdGFuY2VvZgo+IFRhYmxlUm93Pz8/Pwo+Cj4gTWFueSB0aGFua3Mg YW5kIGJlc3QgcmVnYXJkcwo+IE1pY2hhZWwKCgotLQpOYXZlZW4gSyBLb2hsaQpodHRwOi8vd3d3 Lm5ldG9tYXRpeC5jb20K |
From: Derrick O. <Der...@Ro...> - 2006-01-12 00:24:37
|
By the way, after this call: NodeList list = parser.parse (all_tables); the parser will be at the end of the page and return no more nodes. So, this: // Seperate all table tags * for* (NodeIterator e = parser.elements (); e.hasMoreNodes ();) e.nextNode ().collectInto (list,all_tables); doesn't do anything. You can use: parser.reset (); to start again, if that is what you really want to do, but in your case you would get duplicates of everything. Third Eye wrote: >Table tag object already has a fucntion to get the rows and TableRow >has function to get columns. You don't need to iterate yourself. > >On 1/11/06, Fuhrmann, Michael <mic...@sa...> wrote: > > >>Hi All! >> >>I want to change several attributes of the td and tr tags of certain tables >>but I don't know if do it the right way. >>The problem is that I find the right table (only tables with ids) but I >>don't reach the td or tr tags…. >>My code looks like that: >> >>public void cleanDokument(HttpServletRequest >>request,HttpServletResponse response) throws IOException >> { >> // Get the calling HTML Document define the Writer and open >>the connection >> URLConnection connection; >> URL request_url = new >>URL(request.getHeader("referer").toString()); >> >> PrintWriter out = response.getWriter(); >> connection = >>(HttpURLConnection)request_url.openConnection (); >> >> try >> { >> Parser parser = new Parser (); >> parser.setConnection(connection); >> >> NodeFilter all_tables = new TagNameFilter("table"); >> NodeList list = parser.parse (all_tables); >> Node[] nodelist; >> >> // Seperate all table tags >> for (NodeIterator e = parser.elements (); e.hasMoreNodes >>();) >> e.nextNode ().collectInto (list,all_tables); >> >> nodelist=list.toNodeArray(); >> >> for (int h=0; h<nodelist.length;h++) >> { >> if (nodelist[h] instanceof TableTag) >> { >> //for schleife f�r die td's und tr's >> >>if(((TableTag)nodelist[h]).getAttribute("id")!= null) >> { >> for (int i=0; i<nodelist.length; >>i++) >> { >> >>out.println(nodelist.toString()); >> if(nodelist[i] instanceof >>TableRow) >> { >> out.println("Row >>found!"); >> >>((TableRow)nodelist[i]).removeAttribute ("nowrap"); >> } >> else if (nodelist[i] >>instanceof TableColumn) >> { >> out.println("Column >>found!"); >> >>((TableColumn)nodelist[i]).removeAttribute ("nowrap"); >> } >> } >> out.println(nodelist[h].toHtml()); >> } >> } >> else if(nodelist[h] instanceof TableRow || >>nodelist[h] instanceof TableColumn) >> { >> out.println("Else erreicht!"); >> >>out.println(((TableRow)nodelist[h]).getText()); >> } >> } >> //makePdf(out,response); >> } >> catch(Exception e) >> { >> out.println("Fehler beim Parsen!"); >> e.printStackTrace(out); >> } >> } >> >>Does my nodelist contain the tr and td tags? Is it right to say instanceof >>TableRow???? >> >>Many thanks and best regards >>Michael >> >> > > >-- >Naveen K Kohli >http://www.netomatix.com >N?HY隊X???'???u???[??????? >ަ?k??!???W?~?鮆?zk??C? 塧m????@^ǚ??^??z?Z?f?z?j?!?x2???????ɫ,???a{??,?H??4?m???i?(??ܢo?v'??jYhr'ׯ:?rX??{f????????j)b? b???ZZ?ǫ?ǫ?+-??.?ǟ????a??l??b??,???y?+???b????+-?w??f??????ser= > |