Update of /cvsroot/archive-access/archive-access/projects/nutch/src/plugin/index-ia/src/java/org/archive/access/nutch/indexer
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv17728/src/plugin/index-ia/src/java/org/archive/access/nutch/indexer
Modified Files:
IaIndexingFilter.java
Log Message:
* src/plugin/index-ia/src/java/org/archive/access/nutch/indexer/IaIndexingFilter.java
Add 'encoding' to the result.
Index: IaIndexingFilter.java
===================================================================
RCS file: /cvsroot/archive-access/archive-access/projects/nutch/src/plugin/index-ia/src/java/org/archive/access/nutch/indexer/IaIndexingFilter.java,v
retrieving revision 1.19
retrieving revision 1.20
diff -C2 -d -r1.19 -r1.20
*** IaIndexingFilter.java 20 Aug 2005 00:09:37 -0000 1.19
--- IaIndexingFilter.java 6 Oct 2005 02:31:10 -0000 1.20
***************
*** 60,63 ****
--- 60,69 ----
public static final String EXACTURL_KEY = "exacturl";
+
+ /**
+ * Set into metadata by the nutch html parser.
+ */
+ private static final String ENCODING_KEY = "CharEncodingForConversion";
+
private MessageDigest md = null;
***************
*** 106,109 ****
--- 112,118 ----
LOGGER.info("No metadata for " + doc.toString());
} else {
+ // Add as stored, unindexed, and untokenized.
+ add(url, doc, "encoding", p.getProperty(ENCODING_KEY),
+ false, true, true, false);
// Add as stored, indexed, and untokenized.
add(url, doc, ARCCOLLECTION_KEY, p.getProperty(ARCCOLLECTION_KEY),
***************
*** 139,146 ****
if (index > 0) {
add(url, doc, "primarytype", mimetype.substring(0, index),
! true, true, false, false);
if (index + 1 < mimetype.length()) {
add(url, doc, "subtype", mimetype.substring(index + 1),
! true, true, false, false);
}
}
--- 148,155 ----
if (index > 0) {
add(url, doc, "primarytype", mimetype.substring(0, index),
! true, true, true, false);
if (index + 1 < mimetype.length()) {
add(url, doc, "subtype", mimetype.substring(index + 1),
! true, true, true, false);
}
}
|