<?php// It may take a whils to crawl a site ... set_time_limit(10000);// Inculde the phpcrawl-mainclass include("libs/PHPCrawler.class.php");// Extend the class and override the handleDocumentInfo()-method classMyCrawlerextendsPHPCrawler{functionhandleDocumentInfo($DocInfo){echo"<pre>".print_r(get_headers($PageInfo->url),TRUE)."</pre>";flush();}}// Now, create a instance of your class, define the behaviour // of the crawler (see class-reference for more options and details) // and start the crawling-process. $crawler=newMyCrawler();// URL to crawl $crawler->setURL("www.php.net");// Store and send cookie-data like a browser does $crawler->enableCookieHandling(true);// Set the traffic-limit to 1 MB (in bytes, // for testing we dont want to "suck" the whole site) $crawler->setTrafficLimit(1000*1024);// Thats enough, now here we go $crawler->go();// At the end, after the process is finished, we print a short // report (see method getProcessReport() for more information) $report=$crawler->getProcessReport();if(PHP_SAPI=="cli")$lb="\n";else$lb="<br />";echo"Summary:".$lb;echo"Links followed: ".$report->links_followed.$lb;echo"Documents received: ".$report->files_received.$lb;echo"Bytes received: ".$report->bytes_received." bytes".$lb;echo"Process runtime: ".$report->process_runtime." sec".$lb;?>
Last edit: Anonymous 2018-01-04
If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
View and moderate all "Help" comments posted by this user
Mark all as spam, and block user from posting to "Forum"
am using the example proj how can i crawl entire domain and and return content heders e.g text/xml
View and moderate all "Help" comments posted by this user
Mark all as spam, and block user from posting to "Forum"
Mostly copy/pasted from the example given....
Last edit: Anonymous 2018-01-04
View and moderate all "Help" comments posted by this user
Mark all as spam, and block user from posting to "Forum"
Is this above code is still works? how to get data from it?
View and moderate all "Help" comments posted by this user
Mark all as spam, and block user from posting to "Forum"
View and moderate all "Help" comments posted by this user
Mark all as spam, and block user from posting to "Forum"
Last edit: Anonymous 2018-01-16