From: Michael S. <sta...@us...> - 2005-10-06 02:31:18
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/src/plugin/index-ia/src/java/org/archive/access/nutch/indexer In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv17728/src/plugin/index-ia/src/java/org/archive/access/nutch/indexer Modified Files: IaIndexingFilter.java Log Message: * src/plugin/index-ia/src/java/org/archive/access/nutch/indexer/IaIndexingFilter.java Add 'encoding' to the result. Index: IaIndexingFilter.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/src/plugin/index-ia/src/java/org/archive/access/nutch/indexer/IaIndexingFilter.java,v retrieving revision 1.19 retrieving revision 1.20 diff -C2 -d -r1.19 -r1.20 *** IaIndexingFilter.java 20 Aug 2005 00:09:37 -0000 1.19 --- IaIndexingFilter.java 6 Oct 2005 02:31:10 -0000 1.20 *************** *** 60,63 **** --- 60,69 ---- public static final String EXACTURL_KEY = "exacturl"; + + /** + * Set into metadata by the nutch html parser. + */ + private static final String ENCODING_KEY = "CharEncodingForConversion"; + private MessageDigest md = null; *************** *** 106,109 **** --- 112,118 ---- LOGGER.info("No metadata for " + doc.toString()); } else { + // Add as stored, unindexed, and untokenized. + add(url, doc, "encoding", p.getProperty(ENCODING_KEY), + false, true, true, false); // Add as stored, indexed, and untokenized. add(url, doc, ARCCOLLECTION_KEY, p.getProperty(ARCCOLLECTION_KEY), *************** *** 139,146 **** if (index > 0) { add(url, doc, "primarytype", mimetype.substring(0, index), ! true, true, false, false); if (index + 1 < mimetype.length()) { add(url, doc, "subtype", mimetype.substring(index + 1), ! true, true, false, false); } } --- 148,155 ---- if (index > 0) { add(url, doc, "primarytype", mimetype.substring(0, index), ! true, true, true, false); if (index + 1 < mimetype.length()) { add(url, doc, "subtype", mimetype.substring(index + 1), ! true, true, true, false); } } |