Revision: 3327 http://archive-access.svn.sourceforge.net/archive-access/?rev=3327&view=rev Author: binzino Date: 2010-11-09 00:59:07 +0000 (Tue, 09 Nov 2010) Log Message: ----------- Added check for null hostname. Warning log message is issued. Modified Paths: -------------- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/plugin/index-nutchwax/src/java/org/archive/nutchwax/index/ConfigurableIndexingFilter.java Modified: tags/nutchwax-0_13-JIRA-WAX-75/archive/src/plugin/index-nutchwax/src/java/org/archive/nutchwax/index/ConfigurableIndexingFilter.java =================================================================== --- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/plugin/index-nutchwax/src/java/org/archive/nutchwax/index/ConfigurableIndexingFilter.java 2010-11-03 07:13:24 UTC (rev 3326) +++ tags/nutchwax-0_13-JIRA-WAX-75/archive/src/plugin/index-nutchwax/src/java/org/archive/nutchwax/index/ConfigurableIndexingFilter.java 2010-11-09 00:59:07 UTC (rev 3327) @@ -149,8 +149,6 @@ { Metadata meta = parse.getData().getContentMeta(); - // - for ( FieldSpecification spec : this.fieldSpecs ) { String value = null; @@ -169,6 +167,13 @@ value = uri.getHost( ); + if ( value == null ) + { + LOG.warn( "URL has no hostname: \"" + uri + "\""); + + return null; + } + // Strip off any "www." header. if ( value.startsWith( "www." ) ) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |