#include<iostream>#include<sstream>#include<curl/curl.h>#include<htmlcxx/html/ParserDom.h>#include<iostream>usingnamespacestd;usingnamespacehtmlcxx;staticsize_thttp_write(void*buf,size_tsize,size_tnmemb,void*userp){if(userp){ostringstream*oss=static_cast<ostringstream*>(userp);streamsizelen=size*nmemb;oss->write(static_cast<char*>(buf),len);returnnmemb;}return0;}stringget_html_page(conststring&url,longtimeout=0){CURL*curl=curl_easy_init();ostringstreamoss;curl_easy_setopt(curl,CURLOPT_WRITEFUNCTION,&http_write);curl_easy_setopt(curl,CURLOPT_NOPROGRESS,1L);curl_easy_setopt(curl,CURLOPT_FOLLOWLOCATION,1L);curl_easy_setopt(curl,CURLOPT_FILE,&oss);curl_easy_setopt(curl,CURLOPT_TIMEOUT,timeout);curl_easy_setopt(curl,CURLOPT_URL,url.c_str());curl_easy_perform(curl);curl_easy_cleanup(curl);returnoss.str();}intmain(){stringhtml=get_html_page("http://www.google.co.in");//cout << html << endl;HTML::ParserDomparser;tree<HTML::Node>dom=parser.parseTree(html);//Print whole DOM tree//cout <<dom <<endl;//Dump all links in the treetree<HTML::Node>::iteratorit=dom.begin();tree<HTML::Node>::iteratorend=dom.end();for(;it!=end;++it){if(strcasecmp(it->tagName().c_str(),"A")==0){it->parseAttributes();//cout << it->attribute("href").second << endl;}}//Dump all text of the documentit=dom.begin();end=dom.end();for(;it!=end;++it){if((!it->isTag())&&(!it->isComment())){cout<<it->text();}}// cout << endl;return0;}
include <string>
i am getting this as output
please tellme y i am getting the java script code also…
to skip javascript and style info skip "script" and "style" html tags