Update of /cvsroot/archive-access/archive-access/projects/nutch/src/plugin/index-ia/src/java/org/archive/access/nutch/indexer
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv20474/src/plugin/index-ia/src/java/org/archive/access/nutch/indexer
Modified Files:
IaIndexingFilter.java
Log Message:
Fix up wera references. Point to archive-access.
Index: IaIndexingFilter.java
===================================================================
RCS file: /cvsroot/archive-access/archive-access/projects/nutch/src/plugin/index-ia/src/java/org/archive/access/nutch/indexer/IaIndexingFilter.java,v
retrieving revision 1.20
retrieving revision 1.21
diff -C2 -d -r1.20 -r1.21
*** IaIndexingFilter.java 6 Oct 2005 02:31:10 -0000 1.20
--- IaIndexingFilter.java 6 Oct 2005 21:23:17 -0000 1.21
***************
*** 144,158 ****
}
if (mimetype != null) {
! // wera wants the sub and primary types.
int index = mimetype.indexOf('/');
if (index > 0) {
! add(url, doc, "primarytype", mimetype.substring(0, index),
! true, true, true, false);
if (index + 1 < mimetype.length()) {
! add(url, doc, "subtype", mimetype.substring(index + 1),
! true, true, true, false);
}
}
- add(url, doc, "type", mimetype, true, false, true, false);
}
// Add as not lowercased, not stored, indexed, and not tokenized.
--- 144,163 ----
}
if (mimetype != null) {
! // wera wants the sub and primary types in index. So they are
! // stored but not searchable. nutch adds primary and subtypes
! // as well as complete type all to one 'type' field.
! final String type = "type";
! add(url, doc, type, mimetype, true, false, true, false);
int index = mimetype.indexOf('/');
if (index > 0) {
! String tmp = mimetype.substring(0, index);
! add(url, doc, "primaryType", tmp, true, true, false, false);
! add(url, doc, type, tmp, true, false, true, false);
if (index + 1 < mimetype.length()) {
! tmp = mimetype.substring(index + 1);
! add(url, doc, "subType", tmp, true, true, false, false);
! add(url, doc, type, tmp, true, false, true, false);
}
}
}
// Add as not lowercased, not stored, indexed, and not tokenized.
|