|
From: <go...@us...> - 2003-09-30 18:07:58
|
Update of /cvsroot/archive-crawler/ArchiveOpenCrawler/src/org/archive/crawler/basic
In directory sc8-pr-cvs1:/tmp/cvs-serv21645/src/org/archive/crawler/basic
Modified Files:
FetcherHTTPSimple.java ARCWriter.java
Log Message:
stream/http recording
Index: FetcherHTTPSimple.java
===================================================================
RCS file: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/org/archive/crawler/basic/FetcherHTTPSimple.java,v
retrieving revision 1.9
retrieving revision 1.10
diff -C2 -d -r1.9 -r1.10
*** FetcherHTTPSimple.java 25 Sep 2003 00:14:02 -0000 1.9
--- FetcherHTTPSimple.java 30 Sep 2003 18:07:52 -0000 1.10
***************
*** 106,109 ****
--- 106,110 ----
InputStream is = get.getResponseBodyAsStream();
while(is.read()!=-1) {} // TODOSOON: read in bigger chunks!
+ get.getHttpRecorder().close();
Header contentLength = get.getResponseHeader("Content-Length");
Index: ARCWriter.java
===================================================================
RCS file: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/org/archive/crawler/basic/ARCWriter.java,v
retrieving revision 1.31
retrieving revision 1.32
diff -C2 -d -r1.31 -r1.32
*** ARCWriter.java 6 Aug 2003 01:19:29 -0000 1.31
--- ARCWriter.java 30 Sep 2003 18:07:52 -0000 1.32
***************
*** 13,17 ****
import java.io.OutputStream;
! import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.methods.GetMethod;
import org.archive.crawler.basic.StatisticsTracker;
--- 13,17 ----
import java.io.OutputStream;
! // import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.methods.GetMethod;
import org.archive.crawler.basic.StatisticsTracker;
***************
*** 281,308 ****
}
- int headersSize = 0;
int recordLength = 0;
- Header[] headers = get.getResponseHeaders();
-
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- baos.write(get.getStatusLine().toString().getBytes()); // get status line (it's not a header)
- baos.write("\n".getBytes());
- for(int i=0; i < headers.length; i++){
- baos.write(headers[i].toExternalForm().getBytes());
- }
- recordLength += baos.size();
! // get body so we can calc length for metaline
! byte[] body = get.getResponseBody();
! recordLength += body.length;
! // don't forget the extra CRLF between headers and body
! recordLength += 2;
writeMetaLine(curi, recordLength);
! baos.writeTo(out);
! out.write("\r\n".getBytes());
! out.write(body);
! out.write("\n".getBytes());
}
--- 281,314 ----
}
int recordLength = 0;
! // OLD WAY
! // Header[] headers = get.getResponseHeaders();
! //
! // ByteArrayOutputStream baos = new ByteArrayOutputStream();
! // baos.write(get.getStatusLine().toString().getBytes()); // get status line (it's not a header)
! // baos.write("\n".getBytes());
! // for(int i=0; i < headers.length; i++){
! // baos.write(headers[i].toExternalForm().getBytes());
! // }
! // recordLength += baos.size();
! //
! // // get body so we can calc length for metaline
! // byte[] body = get.getResponseBody();
! // // don't forget the extra CRLF between headers and body
! // recordLength += 2;
+ recordLength += get.getHttpRecorder().getRecordedInput().getSize();
+
writeMetaLine(curi, recordLength);
! get.getHttpRecorder().getRecordedInput().getReplayInputStream().readFullyTo(out);
! out.write('\n'); // trailing newline
!
! // OLD WAY
! // baos.writeTo(out);
! // out.write("\r\n".getBytes());
! // out.write(body);
! // out.write("\n".getBytes());
}
|