|
From: <go...@us...> - 2003-09-30 18:07:58
|
Update of /cvsroot/archive-crawler/ArchiveOpenCrawler/src/org/archive/crawler/datamodel
In directory sc8-pr-cvs1:/tmp/cvs-serv21645/src/org/archive/crawler/datamodel
Modified Files:
CrawlServer.java
Log Message:
stream/http recording
Index: CrawlServer.java
===================================================================
RCS file: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/org/archive/crawler/datamodel/CrawlServer.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** CrawlServer.java 6 Aug 2003 01:18:43 -0000 1.1
--- CrawlServer.java 30 Sep 2003 18:07:53 -0000 1.2
***************
*** 13,16 ****
--- 13,17 ----
import org.apache.commons.httpclient.methods.GetMethod;
+ import org.archive.crawler.io.ReplayInputStream;
/**
***************
*** 91,97 ****
// note that akamai will return 400 for some "not founds"
try {
BufferedReader reader = new BufferedReader(
! new InputStreamReader(
! get.getResponseBodyAsStream()));
robots = RobotsExclusionPolicy.policyFor(reader);
} catch (IOException e) {
--- 92,99 ----
// note that akamai will return 400 for some "not founds"
try {
+ ReplayInputStream contentBodyStream = get.getHttpRecorder().getRecordedInput().getReplayInputStream();
+ contentBodyStream.setToResponseBodyStart();
BufferedReader reader = new BufferedReader(
! new InputStreamReader(contentBodyStream));
robots = RobotsExclusionPolicy.policyFor(reader);
} catch (IOException e) {
|