|
From: <bi...@us...> - 2010-07-16 20:25:47
|
Revision: 3170
http://archive-access.svn.sourceforge.net/archive-access/?rev=3170&view=rev
Author: binzino
Date: 2010-07-16 20:25:38 +0000 (Fri, 16 Jul 2010)
Log Message:
-----------
Changed logging levels to be less chatty.
Modified Paths:
--------------
tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/Importer.java
tags/nutchwax-0_13-JIRA-WAX-75/archive/src/plugin/urlfilter-nutchwax/src/java/org/archive/nutchwax/urlfilter/WaybackURLFilter.java
Modified: tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/Importer.java
===================================================================
--- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/Importer.java 2010-07-12 02:26:34 UTC (rev 3169)
+++ tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/Importer.java 2010-07-16 20:25:38 UTC (rev 3170)
@@ -193,13 +193,14 @@
if ( LOG.isInfoEnabled() ) LOG.info( "Importing ARC: " + arcUrl );
- ArchiveReader r = ArchiveReaderFactory.get( arcUrl );
- r.setDigest( true );
-
- ArcReader reader = new ArcReader( r );
-
+ ArchiveReader r = null;
try
{
+ r = ArchiveReaderFactory.get( arcUrl );
+ r.setDigest( true );
+
+ ArcReader reader = new ArcReader( r );
+
for ( ARCRecord record : reader )
{
// When reading WARC files, records of type other than
@@ -214,7 +215,7 @@
}
catch ( Exception e )
{
- LOG.warn( "Error processing archive file: " + arcUrl, e );
+ LOG.error( "Error processing archive file: " + arcUrl, e );
if ( jobConf.getBoolean( "nutchwax.import.abortOnArchiveReadError", false ) )
{
@@ -223,7 +224,7 @@
}
finally
{
- r.close();
+ if ( r != null ) r.close();
if ( LOG.isInfoEnabled() )
{
@@ -246,11 +247,11 @@
{
ARCRecordMetaData meta = record.getMetaData();
- if ( LOG.isInfoEnabled() ) LOG.info( "Consider URL: " + meta.getUrl() + " (" + meta.getMimetype() + ") [" + meta.getLength( ) + "]" );
+ if ( LOG.isDebugEnabled() ) LOG.debug( "Consider URL: " + meta.getUrl() + " (" + meta.getMimetype() + ") [" + meta.getLength( ) + "]" );
if ( ! this.httpStatusCodeFilter.isAllowed( record.getStatusCode( ) ) )
{
- if ( LOG.isInfoEnabled() ) LOG.info( "Skip URL: " + meta.getUrl() + " HTTP status:" + record.getStatusCode() );
+ if ( LOG.isDebugEnabled() ) LOG.debug( "Skip URL: " + meta.getUrl() + " HTTP status:" + record.getStatusCode() );
return false;
}
@@ -291,7 +292,7 @@
if ( url == null )
{
- if ( LOG.isInfoEnabled() ) LOG.info( "Skip URL: " + meta.getUrl() );
+ if ( LOG.isDebugEnabled() ) LOG.debug( "Skip URL: " + meta.getUrl() );
return false;
}
@@ -375,11 +376,11 @@
}
catch ( MalformedURLException mue )
{
- if ( LOG.isInfoEnabled() ) LOG.info( "MalformedURL: " + candidateUrl );
+ if ( LOG.isDebugEnabled() ) LOG.debug( "MalformedURL: " + candidateUrl );
}
catch ( URLFilterException ufe )
{
- if ( LOG.isInfoEnabled() ) LOG.info( "URL filtered: " + candidateUrl );
+ if ( LOG.isDebugEnabled() ) LOG.debug( "URL filtered: " + candidateUrl );
}
return null;
@@ -439,9 +440,9 @@
{
parseResult = this.parseUtil.parse( content );
}
- catch ( Exception e )
+ catch ( Throwable t )
{
- LOG.warn( "Error parsing: " + key, e );
+ if ( LOG.isDebugEnabled() ) LOG.debug( "Error parsing: " + key, t );
}
// ?: This is taken from Nutch Fetcher. I believe the signatures are used in the Fetcher
@@ -590,7 +591,7 @@
count += record.read( buf, 0, Math.min( buf.length, record.available( ) ) );
}
- if ( LOG.isInfoEnabled() ) LOG.info( "Bytes read: expected=" + contentLength + " bytes.length=" + bytes.length + " pos=" + pos + " count=" + count );
+ if ( LOG.isDebugEnabled() ) LOG.debug( "Bytes read: expected=" + contentLength + " bytes.length=" + bytes.length + " pos=" + pos + " count=" + count );
// Sanity check. The number of bytes read into our bytes[]
// buffer, plus the count of extra stuff read after it should
Modified: tags/nutchwax-0_13-JIRA-WAX-75/archive/src/plugin/urlfilter-nutchwax/src/java/org/archive/nutchwax/urlfilter/WaybackURLFilter.java
===================================================================
--- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/plugin/urlfilter-nutchwax/src/java/org/archive/nutchwax/urlfilter/WaybackURLFilter.java 2010-07-12 02:26:34 UTC (rev 3169)
+++ tags/nutchwax-0_13-JIRA-WAX-75/archive/src/plugin/urlfilter-nutchwax/src/java/org/archive/nutchwax/urlfilter/WaybackURLFilter.java 2010-07-16 20:25:38 UTC (rev 3170)
@@ -70,7 +70,7 @@
if ( s.length != 3 )
{
// Don't filter.
- LOG.info( "Allowing : " + urlString );
+ if ( LOG.isDebugEnabled() ) LOG.debug( "Allowing : " + urlString );
return urlString;
}
@@ -101,12 +101,12 @@
if ( exclude )
{
- LOG.info( "Excluding: " + urlString );
+ if ( LOG.isDebugEnabled() ) LOG.debug( "Excluding: " + urlString );
return null;
}
- LOG.info( "Allowing : " + urlString );
+ if ( LOG.isDebugEnabled() ) LOG.debug( "Allowing : " + urlString );
return urlString;
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|