From: <bi...@us...> - 2010-07-16 20:25:47
|
Revision: 3170 http://archive-access.svn.sourceforge.net/archive-access/?rev=3170&view=rev Author: binzino Date: 2010-07-16 20:25:38 +0000 (Fri, 16 Jul 2010) Log Message: ----------- Changed logging levels to be less chatty. Modified Paths: -------------- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/Importer.java tags/nutchwax-0_13-JIRA-WAX-75/archive/src/plugin/urlfilter-nutchwax/src/java/org/archive/nutchwax/urlfilter/WaybackURLFilter.java Modified: tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/Importer.java =================================================================== --- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/Importer.java 2010-07-12 02:26:34 UTC (rev 3169) +++ tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/Importer.java 2010-07-16 20:25:38 UTC (rev 3170) @@ -193,13 +193,14 @@ if ( LOG.isInfoEnabled() ) LOG.info( "Importing ARC: " + arcUrl ); - ArchiveReader r = ArchiveReaderFactory.get( arcUrl ); - r.setDigest( true ); - - ArcReader reader = new ArcReader( r ); - + ArchiveReader r = null; try { + r = ArchiveReaderFactory.get( arcUrl ); + r.setDigest( true ); + + ArcReader reader = new ArcReader( r ); + for ( ARCRecord record : reader ) { // When reading WARC files, records of type other than @@ -214,7 +215,7 @@ } catch ( Exception e ) { - LOG.warn( "Error processing archive file: " + arcUrl, e ); + LOG.error( "Error processing archive file: " + arcUrl, e ); if ( jobConf.getBoolean( "nutchwax.import.abortOnArchiveReadError", false ) ) { @@ -223,7 +224,7 @@ } finally { - r.close(); + if ( r != null ) r.close(); if ( LOG.isInfoEnabled() ) { @@ -246,11 +247,11 @@ { ARCRecordMetaData meta = record.getMetaData(); - if ( LOG.isInfoEnabled() ) LOG.info( "Consider URL: " + meta.getUrl() + " (" + meta.getMimetype() + ") [" + meta.getLength( ) + "]" ); + if ( LOG.isDebugEnabled() ) LOG.debug( "Consider URL: " + meta.getUrl() + " (" + meta.getMimetype() + ") [" + meta.getLength( ) + "]" ); if ( ! this.httpStatusCodeFilter.isAllowed( record.getStatusCode( ) ) ) { - if ( LOG.isInfoEnabled() ) LOG.info( "Skip URL: " + meta.getUrl() + " HTTP status:" + record.getStatusCode() ); + if ( LOG.isDebugEnabled() ) LOG.debug( "Skip URL: " + meta.getUrl() + " HTTP status:" + record.getStatusCode() ); return false; } @@ -291,7 +292,7 @@ if ( url == null ) { - if ( LOG.isInfoEnabled() ) LOG.info( "Skip URL: " + meta.getUrl() ); + if ( LOG.isDebugEnabled() ) LOG.debug( "Skip URL: " + meta.getUrl() ); return false; } @@ -375,11 +376,11 @@ } catch ( MalformedURLException mue ) { - if ( LOG.isInfoEnabled() ) LOG.info( "MalformedURL: " + candidateUrl ); + if ( LOG.isDebugEnabled() ) LOG.debug( "MalformedURL: " + candidateUrl ); } catch ( URLFilterException ufe ) { - if ( LOG.isInfoEnabled() ) LOG.info( "URL filtered: " + candidateUrl ); + if ( LOG.isDebugEnabled() ) LOG.debug( "URL filtered: " + candidateUrl ); } return null; @@ -439,9 +440,9 @@ { parseResult = this.parseUtil.parse( content ); } - catch ( Exception e ) + catch ( Throwable t ) { - LOG.warn( "Error parsing: " + key, e ); + if ( LOG.isDebugEnabled() ) LOG.debug( "Error parsing: " + key, t ); } // ?: This is taken from Nutch Fetcher. I believe the signatures are used in the Fetcher @@ -590,7 +591,7 @@ count += record.read( buf, 0, Math.min( buf.length, record.available( ) ) ); } - if ( LOG.isInfoEnabled() ) LOG.info( "Bytes read: expected=" + contentLength + " bytes.length=" + bytes.length + " pos=" + pos + " count=" + count ); + if ( LOG.isDebugEnabled() ) LOG.debug( "Bytes read: expected=" + contentLength + " bytes.length=" + bytes.length + " pos=" + pos + " count=" + count ); // Sanity check. The number of bytes read into our bytes[] // buffer, plus the count of extra stuff read after it should Modified: tags/nutchwax-0_13-JIRA-WAX-75/archive/src/plugin/urlfilter-nutchwax/src/java/org/archive/nutchwax/urlfilter/WaybackURLFilter.java =================================================================== --- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/plugin/urlfilter-nutchwax/src/java/org/archive/nutchwax/urlfilter/WaybackURLFilter.java 2010-07-12 02:26:34 UTC (rev 3169) +++ tags/nutchwax-0_13-JIRA-WAX-75/archive/src/plugin/urlfilter-nutchwax/src/java/org/archive/nutchwax/urlfilter/WaybackURLFilter.java 2010-07-16 20:25:38 UTC (rev 3170) @@ -70,7 +70,7 @@ if ( s.length != 3 ) { // Don't filter. - LOG.info( "Allowing : " + urlString ); + if ( LOG.isDebugEnabled() ) LOG.debug( "Allowing : " + urlString ); return urlString; } @@ -101,12 +101,12 @@ if ( exclude ) { - LOG.info( "Excluding: " + urlString ); + if ( LOG.isDebugEnabled() ) LOG.debug( "Excluding: " + urlString ); return null; } - LOG.info( "Allowing : " + urlString ); + if ( LOG.isDebugEnabled() ) LOG.debug( "Allowing : " + urlString ); return urlString; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |