Update of /cvsroot/archive-access/archive-access/projects/nutch/src/plugin/index-ia/src/java/org/archive/access/nutch/indexer
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv7054/src/plugin/index-ia/src/java/org/archive/access/nutch/indexer
Modified Files:
IaIndexingFilter.java
Log Message:
* project.properties
* src/articles/releasenotes.xml
* xdocs/srcbuild.xml
Revert to 0.7.0 nutch. 0.7.1 has problems.
* src/java/org/archive/access/nutch/Arc2Segment.java
If we fail parse, don't add to index (Shouldd get rid of those
no arcoffset, etc., messages we used get indexing).
* src/plugin/index-ia/src/java/org/archive/access/nutch/indexer/IaIndexingFilter.java
Don't warn if 'encoding' not present -- won't be present for many types.
Index: IaIndexingFilter.java
===================================================================
RCS file: /cvsroot/archive-access/archive-access/projects/nutch/src/plugin/index-ia/src/java/org/archive/access/nutch/indexer/IaIndexingFilter.java,v
retrieving revision 1.21
retrieving revision 1.22
diff -C2 -d -r1.21 -r1.22
*** IaIndexingFilter.java 6 Oct 2005 21:23:17 -0000 1.21
--- IaIndexingFilter.java 18 Oct 2005 23:21:11 -0000 1.22
***************
*** 112,118 ****
LOGGER.info("No metadata for " + doc.toString());
} else {
! // Add as stored, unindexed, and untokenized.
add(url, doc, "encoding", p.getProperty(ENCODING_KEY),
! false, true, true, false);
// Add as stored, indexed, and untokenized.
add(url, doc, ARCCOLLECTION_KEY, p.getProperty(ARCCOLLECTION_KEY),
--- 112,119 ----
LOGGER.info("No metadata for " + doc.toString());
} else {
! // Add as stored, unindexed, and untokenized. Don't warn if absent.
! // Its not a tradegy.
add(url, doc, "encoding", p.getProperty(ENCODING_KEY),
! false, true, true, false, false);
// Add as stored, indexed, and untokenized.
add(url, doc, ARCCOLLECTION_KEY, p.getProperty(ARCCOLLECTION_KEY),
***************
*** 177,182 ****
boolean lowerCase, boolean store, boolean index,
boolean tokenize) {
if (fieldValue == null || fieldValue.length() <= 0) {
! LOGGER.warning("No " + fieldName + " for url " + url);
return;
}
--- 178,193 ----
boolean lowerCase, boolean store, boolean index,
boolean tokenize) {
+ add(url, doc, fieldName, fieldValue, lowerCase, store, index, tokenize,
+ true);
+ }
+
+ private void add(final String url, final Document doc,
+ final String fieldName, final String fieldValue,
+ boolean lowerCase, boolean store, boolean index,
+ boolean tokenize, final boolean warn) {
if (fieldValue == null || fieldValue.length() <= 0) {
! if (warn) {
! LOGGER.warning("No " + fieldName + " for url " + url);
! }
return;
}
|