Hi,
I have a problem with POST data to some urls that when run $crawler->go, the report object is like : [links_followed] => 1 [files_received] => 0 [bytes_received] => 0
...
The site is asp and make some requests with ajax that in every request some form data such as __VIEWSTATE, __STATEVALIDATION ,etc must send to retrieve the next page.
The code is:
protectedfunction_init(){$this->crawler->setURL($this->url);$this->crawler->setStreamTimeout(15);// defaults to 2 seconds$this->crawler->setConnectionTimeout(20);// defaults to 5 seconds$this->crawler->enableCookieHandling(true);//This rules lets the crawler receive the content/source of pages with the Content-Type "text/html"//Other pages or files with different content-types (e.g. "image/gif") won't be received$this->crawler->addContentTypeReceiveRule("#text/html#");$this->crawler->addURLFilterRule("#\.(jpg|jpeg|gif|png)$# i");$this->crawler->addURLFilterRule("#\.(css|js)$# i");$this->__setProxyAndAgent();}
$this->url='http://fa.journals.sid.ir/AdvanceJournal.aspx';setConfig('useAgents',true);$crawlingTryAgainCount=getConfig('crawlingTryAgainCount');// for example is 3do{$this->_init();$this->crawler->addURLFollowRule("#!NOTHING!#");$this->crawler->start();$report=$this->_logCrawlerSummaryReport();}while($report->links_followed!=$report->files_received&&$crawlingTryAgainCount--!=0);$regex=['hiddenInputs'=>'<input.*?type="hidden".*?name="(.*?)".*?value="(.*?)".*?>',];preg_match_all($regex['hiddenInputs'],$this->crawler->plainContent,$hiddenInputsMatch);$postData=array_combine($hiddenInputsMatch[1],$hiddenInputsMatch[2]);$postData=array_merge($postData,['ctl00$ContentPlaceHolder1$ScriptManager1'=>'ctl00$ContentPlaceHolder1$UpdatePanel1|ctl00$ContentPlaceHolder1$Timer1','__ASYNCPOST'=>'true',]);$this->crawler->plainContent='';$crawlingTryAgainCount=getConfig('crawlingTryAgainCount');do{$this->crawler->addPostData("#{$this->url}#",$postData);$this->crawler->setFollowMode(1);$this->crawler->setFollowRedirects(TRUE);$this->crawler->setTrafficLimit(1000*1024);$this->crawler->start();$report=$this->_logCrawlerSummaryReport();debug($report);}while($report->links_followed!=$report->files_received&&$crawlingTryAgainCount--!=0);debug($this->crawler->plainContent,1);
View and moderate all "Help" comments posted by this user
Mark all as spam, and block user from posting to "Forum"
Hi,
I have a problem with POST data to some urls that when run $crawler->go, the report object is like :
[links_followed] => 1
[files_received] => 0
[bytes_received] => 0
...
The site is asp and make some requests with ajax that in every request some form data such as __VIEWSTATE, __STATEVALIDATION ,etc must send to retrieve the next page.
The code is:
////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
$this->crawler->plainContent is a variable that store $pageInfo->source.
In this code in every two steps i use one crawler object.
but in the last line of code (debug($this->crawler->plainContent, 1);) the plain content is empty.
Can anyone please help me ?
Thank you.
Last edit: Anonymous 2015-06-06
View and moderate all "Help" comments posted by this user
Mark all as spam, and block user from posting to "Forum"
help