Update of /cvsroot/dlsciences/dlese-tools-project/src/org/dlese/dpc/index/writer
In directory fdv4jf1.ch3.sourceforge.com:/tmp/cvs-serv28780/src/org/dlese/dpc/index/writer
Modified Files:
DleseCollectionFileIndexingWriter.java
ItemFileIndexingWriter.java XMLFileIndexingWriter.java
Log Message:
-Implemented a relations framework in for DDSWS search service. Annotations
'isAnnotatedBy' relation is currently supported for all frameworks. Paves way
for other arbitrary relationship types to be defined in the future.
-Annotations are now supported for all record types, not just ADN, as part the the
new relations framework (see above).
Index: XMLFileIndexingWriter.java
===================================================================
RCS file: /cvsroot/dlsciences/dlese-tools-project/src/org/dlese/dpc/index/writer/XMLFileIndexingWriter.java,v
retrieving revision 1.56
retrieving revision 1.57
diff -C2 -d -r1.56 -r1.57
*** XMLFileIndexingWriter.java 20 Mar 2009 23:33:53 -0000 1.56
--- XMLFileIndexingWriter.java 16 May 2009 00:02:29 -0000 1.57
***************
*** 14,18 ****
* All rights reserved.
*/
-
package org.dlese.dpc.index.writer;
--- 14,17 ----
***************
*** 54,58 ****
private XMLIndexer _xmlIndexer = null;
private String[] _collections = null;
!
/** Constructor for the XMLFileIndexingWriter. */
--- 53,59 ----
private XMLIndexer _xmlIndexer = null;
private String[] _collections = null;
! private ResultDoc[] _myAnnoResultDocs = null;
! private boolean _itemHasRelations = false;
!
/** Constructor for the XMLFileIndexingWriter. */
***************
*** 276,280 ****
if (recordDataService != null)
vocab = recordDataService.getVocab();
!
// ------ Standard XML indexing handled by XMLIndexer ------------
--- 277,281 ----
if (recordDataService != null)
vocab = recordDataService.getVocab();
!
// ------ Standard XML indexing handled by XMLIndexer ------------
***************
*** 322,328 ****
// ------ [end] Standard XML indexing handled by XMLIndexer ------------
- prtln("Adding index fields for ID: " + getPrimaryId());
String[] collections = getCollections();
// Add my collection and collectionKey
--- 323,441 ----
// ------ [end] Standard XML indexing handled by XMLIndexer ------------
+
+ // ------ Index relations for this item ------------
+
+ ResultDoc[] myAnnoResultDocs = getMyAnnoResultDocs();
+
+ // Index the annotations as a standard relation:
+ indexRelation(myAnnoResultDocs,"isAnnotatedBy",newDoc);
+
+ // To do: Implement support for other configurable relations types...
+
+ // If one or more relations have been indexed, indicate as so:
+ if(_itemHasRelations) {
+ newDoc.add(new Field("itemhasrelations", "true", Field.Store.YES, Field.Index.UN_TOKENIZED));
+ } else {
+ newDoc.add(new Field("itemhasrelations", "false", Field.Store.YES, Field.Index.UN_TOKENIZED));
+ }
+
+ // ------ [end] Index relations for this item ------------
+ // ----------- Annotations for this item ------------------
+
+ // Note: See some related index fields applied in ItemFileIndexingWriter
+
+ // Add anno fields only available if the RecordDataService is avail:
+ if (recordDataService != null) {
+
+ String fieldContent = null;
+ List fieldList = null;
+
+ // Flag anno
+ if (myAnnoResultDocs != null && myAnnoResultDocs.length > 0)
+ newDoc.add(new Field("itemhasanno", "true", Field.Store.YES, Field.Index.TOKENIZED));
+ else
+ newDoc.add(new Field("itemhasanno", "false", Field.Store.YES, Field.Index.TOKENIZED));
+
+ // Anno types
+ fieldContent = "";
+ fieldList = recordDataService.getAnnoTypesFromResultDocs(myAnnoResultDocs);
+ if (fieldList != null && fieldList.size() > 0) {
+ for (int i = 0; i < fieldList.size(); i++)
+ fieldContent += ((String) fieldList.get(i)).replaceAll(" ", "+") + " ";
+ newDoc.add(new Field("itemannotypes", fieldContent, Field.Store.YES, Field.Index.TOKENIZED));
+ }
+
+ // Anno pathways
+ fieldContent = "";
+ fieldList = recordDataService.getAnnoPathwaysFromResultDocs(myAnnoResultDocs);
+ if (fieldList != null && fieldList.size() > 0) {
+ for (int i = 0; i < fieldList.size(); i++)
+ fieldContent += ((String) fieldList.get(i)).replaceAll(" ", "+") + " ";
+ newDoc.add(new Field("itemannopathways", fieldContent, Field.Store.YES, Field.Index.TOKENIZED));
+ }
+
+ // Anno collection keys, e.g. {06, 09}
+ fieldContent = "";
+ fieldList = recordDataService.getCollectionKeysFromResultDocs(myAnnoResultDocs);
+ if (fieldList != null && fieldList.size() > 0) {
+ for (int i = 0; i < fieldList.size(); i++) {
+ fieldContent += (String) fieldList.get(i);
+ if (i < (fieldList.size() - 1))
+ fieldContent += "+";
+ }
+ //prtln("itemannocollectionkeys for " + this.getId() + " is: " + fieldContent);
+ newDoc.add(new Field("itemannocollectionkeys", fieldContent, Field.Store.YES, Field.Index.TOKENIZED));
+ }
+
+ // Anno collection keys e.g. {06, 09} for those with status completed only
+ fieldContent = "";
+ ArrayList completedAnnoCollectionKeys
+ = recordDataService.getCompletedAnnoCollectionKeysFromResultDocs(myAnnoResultDocs);
+ if (completedAnnoCollectionKeys != null && completedAnnoCollectionKeys.size() > 0) {
+ for (int i = 0; i < completedAnnoCollectionKeys.size(); i++) {
+ fieldContent += (String) completedAnnoCollectionKeys.get(i);
+ if (i < (completedAnnoCollectionKeys.size() - 1))
+ fieldContent += "+";
+ }
+ //prtln("itemannocompletedcollectionkeys for " + this.getId() + " is: " + fieldContent);
+ newDoc.add(new Field("itemannocompletedcollectionkeys", fieldContent, Field.Store.YES, Field.Index.TOKENIZED));
+ }
+
+ // Anno status
+ fieldContent = "";
+ fieldList = recordDataService.getAnnoStatusFromResultDocs(myAnnoResultDocs);
+ if (fieldList != null && fieldList.size() > 0) {
+ for (int i = 0; i < fieldList.size(); i++)
+ fieldContent += ((String) fieldList.get(i)).replaceAll(" ", "+") + " ";
+ newDoc.add(new Field("itemannostatus", fieldContent, Field.Store.YES, Field.Index.TOKENIZED));
+ }
+
+ // Anno formats
+ fieldContent = "";
+ fieldList = recordDataService.getAnnoFormatsFromResultDocs(myAnnoResultDocs);
+ if (fieldList != null && fieldList.size() > 0) {
+ for (int i = 0; i < fieldList.size(); i++)
+ fieldContent += ((String) fieldList.get(i)) + " ";
+ newDoc.add(new Field("itemannoformats", fieldContent, Field.Store.YES, Field.Index.TOKENIZED));
+ }
+
+ // Anno rating information and statistics
+ indexAnnoRatings(myAnnoResultDocs, newDoc);
+
+ }
+ // If no record data service, mark the item as having no annos
+ else {
+ newDoc.add(new Field("itemhasanno", "false", Field.Store.YES, Field.Index.TOKENIZED));
+ }
+
+ // ----------- [end] Annotations for this item ------------------
+
+
+ // ----------- Global fields for all XML records and sub-class handlers -------------
+
+ prtln("Adding index fields for ID: " + getPrimaryId());
+
String[] collections = getCollections();
// Add my collection and collectionKey
***************
*** 339,346 ****
}
}
!
DleseCollectionDocReader dleseCollectionDocReader = getMyCollectionDoc();
if (dleseCollectionDocReader != null)
! newDoc.add(new Field("myCollectionRecordIdValue", dleseCollectionDocReader.getId(), Field.Store.YES, Field.Index.NO));
newDoc.add(new Field("metadatapfx", '0' + getDocType(), Field.Store.YES, Field.Index.UN_TOKENIZED));
--- 452,467 ----
}
}
!
! // Store the ID for the collection I am a member of. (The first time the index is built, the DocReader for the 'collect' collection is not available):
! String key = getCollections()[0];
! String myCollectionRecordIdValue = null;
DleseCollectionDocReader dleseCollectionDocReader = getMyCollectionDoc();
if (dleseCollectionDocReader != null)
! myCollectionRecordIdValue = dleseCollectionDocReader.getId();
! else if(recordDataService != null && recordDataService.getCollectCollectionID() != null)
! myCollectionRecordIdValue = recordDataService.getCollectCollectionID();
! else if(key != null && key.equals("collect"))
! myCollectionRecordIdValue = "ID-FOR-COLLECT-NOT-YET-AVAILABLE";
! newDoc.add(new Field("myCollectionRecordIdValue", myCollectionRecordIdValue, Field.Store.YES, Field.Index.NO));
newDoc.add(new Field("metadatapfx", '0' + getDocType(), Field.Store.YES, Field.Index.UN_TOKENIZED));
***************
*** 455,458 ****
--- 576,654 ----
addFields(newDoc, existingDoc, sourceFile);
}
+
+ /**
+ * Indexes a relation for this item.
+ *
+ * @param relatedDocs An array of ResultDocs
+ * @param relationType The type of relationship, for example 'isAnnotatedBy'
+ * @param luceneDoc The Document to add the fields to
+ */
+ private void indexRelation(ResultDoc[] relatedDocs, String relationType, Document luceneDoc) throws Exception {
+ if(relatedDocs != null && relatedDocs.length > 0) {
+ List relatedIds = new ArrayList();
+ for(int i = 0; i < relatedDocs.length; i++) {
+ XMLDocReader xmlDocReader = (XMLDocReader)(relatedDocs[i].getDocReader());
+ // Index all xPaths for this item
+ XMLIndexer xmlIndexer = new XMLIndexer(xmlDocReader.getXml(), xmlDocReader.getDoctype(), getXmlIndexerFieldsConfig());
+ xmlIndexer.setXPathFieldsPrefix("/relation." + relationType + "/");
+
+ // Index just the XPath fields:
+ xmlIndexer.indexXpathFields(luceneDoc);
+ relatedIds.add(xmlDocReader.getId());
+ }
+
+ // Index the IDs so these docs can be retrieved later:
+ for(int i = 0; i < relatedIds.size(); i++) {
+ luceneDoc.add(new Field("indexedRelationIds."+relationType, relatedIds.get(i).toString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
+ }
+
+ luceneDoc.add(new Field("indexedRelations", relationType, Field.Store.YES, Field.Index.UN_TOKENIZED));
+
+ _itemHasRelations = true;
+ }
+ }
+
+ /**
+ * Indexes the annotation rating information and tabulated statistics.
+ *
+ * @param annoResultDocs An array of anno ResultDocs
+ * @param newDoc The Document to add the fields to
+ */
+ private void indexAnnoRatings(ResultDoc[] annoResultDocs, Document newDoc) {
+
+ float numRatings = 0;
+ float totalRating = 0;
+ String ratings = null;
+ if (annoResultDocs != null && annoResultDocs.length > 0) {
+ ratings = "";
+ for (int i = 0; i < annoResultDocs.length; i++) {
+ String rating = ((DleseAnnoDocReader) annoResultDocs[i].getDocReader()).getRating();
+ if (rating != null && rating.length() > 0) {
+ try {
+ totalRating += Float.parseFloat(rating);
+ numRatings++;
+ ratings += rating + " ";
+ } catch (Exception nfe) {}
+ }
+ }
+ }
+
+ // The total number of ratings assigned to this resource
+ newDoc.add(new Field("itemannonumratings", new DecimalFormat("00000").format(numRatings), Field.Store.YES, Field.Index.UN_TOKENIZED));
+
+ // A String of all the ratings assigned to this resource, as numbers (e.g. '1 1 2 4 2 3')
+ if (ratings != null && ratings.length() > 0)
+ newDoc.add(new Field("itemannoratingvalues", ratings, Field.Store.YES, Field.Index.TOKENIZED));
+
+ // The average rating for this resource
+ if (numRatings > 0 || totalRating > 0) {
+ float aveRating = (totalRating / numRatings);
+
+ NumberFormat formatter = new DecimalFormat("0.000");
+ //prtln("ave rating: " + aveRating + " string: " + formatter.format(aveRating));
+ newDoc.add(new Field("itemannoaveragerating", formatter.format(aveRating), Field.Store.YES, Field.Index.UN_TOKENIZED));
+ }
+ }
+
***************
*** 537,540 ****
--- 733,751 ----
}
+ /**
+ * Gets the annotations for this record, null or zero length if none available.
+ *
+ * @return The myAnnoResultDocs value
+ */
+ protected ResultDoc[] getMyAnnoResultDocs() throws Exception {
+ if (_myAnnoResultDocs == null) {
+ RecordDataService recordDataService = getRecordDataService();
+ if (recordDataService != null) {
+ // Get annotation for this record only. If I am a multi-doc, these (should) include all annos for all records
+ _myAnnoResultDocs = recordDataService.getDleseAnnoResultDocs(getIds());
+ }
+ }
+ return _myAnnoResultDocs;
+ }
/**
***************
*** 740,743 ****
--- 951,955 ----
collections = recordDataService.getIndex().searchDocs(q);
} catch (Throwable e) {
+ // When the index is first built, the 'collect' collection is not avaialble until it is written to the index
prtlnErr("Unable to get collection doc: " + e);
return null;
Index: DleseCollectionFileIndexingWriter.java
===================================================================
RCS file: /cvsroot/dlsciences/dlese-tools-project/src/org/dlese/dpc/index/writer/DleseCollectionFileIndexingWriter.java,v
retrieving revision 1.27
retrieving revision 1.28
diff -C2 -d -r1.27 -r1.28
*** DleseCollectionFileIndexingWriter.java 20 Mar 2009 23:33:53 -0000 1.27
--- DleseCollectionFileIndexingWriter.java 16 May 2009 00:02:29 -0000 1.28
***************
*** 428,432 ****
/**
! * Nothing needed.
*
* @param source The source file being indexed
--- 428,432 ----
/**
! * Performs the necessary init functions (nothing done).
*
* @param source The source file being indexed
***************
*** 486,489 ****
--- 486,494 ----
throws Exception {
+ // The first time the index is built, the 'collect' DocReader is not available, so let RecordDataService know the collect ID:
+ if(getRecordDataService() != null && getKey().equals("collect")) {
+ getRecordDataService().setCollectCollectionID(getPrimaryId());
+ }
+
addToDefaultField(getDescription());
Index: ItemFileIndexingWriter.java
===================================================================
RCS file: /cvsroot/dlsciences/dlese-tools-project/src/org/dlese/dpc/index/writer/ItemFileIndexingWriter.java,v
retrieving revision 1.63
retrieving revision 1.64
diff -C2 -d -r1.63 -r1.64
*** ItemFileIndexingWriter.java 20 Mar 2009 23:33:53 -0000 1.63
--- ItemFileIndexingWriter.java 16 May 2009 00:02:29 -0000 1.64
***************
*** 14,18 ****
* All rights reserved.
*/
-
package org.dlese.dpc.index.writer;
--- 14,17 ----
***************
*** 61,69 ****
*
*
! * @author John Weatherley
! * @see org.dlese.dpc.index.reader.ItemDocReader
! * @see org.dlese.dpc.index.reader.XMLDocReader
! * @see org.dlese.dpc.repository.RecordDataService
! * @see org.dlese.dpc.index.writer.FileIndexingServiceWriter
*/
public abstract class ItemFileIndexingWriter extends XMLFileIndexingWriter {
--- 60,68 ----
*
*
! * @author John Weatherley
! * @see org.dlese.dpc.index.reader.ItemDocReader
! * @see org.dlese.dpc.index.reader.XMLDocReader
! * @see org.dlese.dpc.repository.RecordDataService
! * @see org.dlese.dpc.index.writer.FileIndexingServiceWriter
*/
public abstract class ItemFileIndexingWriter extends XMLFileIndexingWriter {
***************
*** 82,86 ****
// Get annotation for this record only. If I am a multi-doc, these include all annos for all records
! private ResultDoc[] myAnnoResultDocs = null;
// Get ALL annotations for this resource from all records associated with the resource
--- 81,85 ----
// Get annotation for this record only. If I am a multi-doc, these include all annos for all records
! private ResultDoc[] _myAnnoResultDocs = null;
// Get ALL annotations for this resource from all records associated with the resource
***************
*** 234,238 ****
* Adds fields to the index that are unique to the given framework.<p>
*
- *
* Example code:<br>
* <code>protected void addFrameworkFields(Document newDoc, Document existingDoc) throws Exception {</code>
--- 233,236 ----
***************
*** 347,353 ****
allIds = (String[]) tmpAllIds.toArray(new String[]{});
- // Get annotation for this record only. If I am a multi-doc, these include all annos for all records
- myAnnoResultDocs = recordDataService.getDleseAnnoResultDocs(myIDs);
-
// Get ALL annotations for this resource from all records associated with the resource
// regardless of whether I am a multi-doc
--- 345,348 ----
***************
*** 361,365 ****
assocItemResultDocs = null;
allIds = null;
- myAnnoResultDocs = null;
allAnnoResultDocs = null;
}
--- 356,359 ----
***************
*** 384,387 ****
--- 378,383 ----
}
+ ResultDoc[] myAnnoResultDocs = getMyAnnoResultDocs();
+
// Use the later of my accession date and the wnDates of my annotations
if (wnDate != null && myAnnoResultDocs != null && myAnnoResultDocs.length > 0) {
***************
*** 410,413 ****
--- 406,428 ----
/**
+ * Gets the annotations for this record, null or zero length if none available. Overrides method in
+ * XMLFileIndexingWriter because IDs need initializing.
+ *
+ * @return The myAnnoResultDocs value
+ * @exception Exception If error
+ */
+ protected ResultDoc[] getMyAnnoResultDocs() throws Exception {
+ if (_myAnnoResultDocs == null) {
+ RecordDataService recordDataService = getRecordDataService();
+ if (recordDataService != null) {
+ // Get annotation for this record only. If I am a multi-doc, these (should) include all annos for all records
+ _myAnnoResultDocs = recordDataService.getDleseAnnoResultDocs(_getIds());
+ }
+ }
+ return _myAnnoResultDocs;
+ }
+
+
+ /**
* Adds fields to the index that are common to all item-level documents. These include the title,
* description, id and url as well as collection, accession status, annotation references, and
***************
*** 431,434 ****
--- 446,450 ----
List fieldList;
+ /*
// Flag anno
if (myAnnoResultDocs != null && myAnnoResultDocs.length > 0)
***************
*** 436,440 ****
else
newDoc.add(new Field("itemhasanno", "false", Field.Store.YES, Field.Index.TOKENIZED));
-
// Anno types
fieldContent = "";
--- 452,455 ----
***************
*** 445,449 ****
newDoc.add(new Field("itemannotypes", fieldContent, Field.Store.YES, Field.Index.TOKENIZED));
}
-
// Anno pathways
fieldContent = "";
--- 460,463 ----
***************
*** 454,458 ****
newDoc.add(new Field("itemannopathways", fieldContent, Field.Store.YES, Field.Index.TOKENIZED));
}
-
// Anno collection keys, e.g. {06, 09}
fieldContent = "";
--- 468,471 ----
***************
*** 467,471 ****
newDoc.add(new Field("itemannocollectionkeys", fieldContent, Field.Store.YES, Field.Index.TOKENIZED));
}
-
// Anno collection keys e.g. {06, 09} for those with status completed only
fieldContent = "";
--- 480,483 ----
***************
*** 481,485 ****
newDoc.add(new Field("itemannocompletedcollectionkeys", fieldContent, Field.Store.YES, Field.Index.TOKENIZED));
}
-
// Anno status
fieldContent = "";
--- 493,496 ----
***************
*** 490,494 ****
newDoc.add(new Field("itemannostatus", fieldContent, Field.Store.YES, Field.Index.TOKENIZED));
}
!
// Anno formats
fieldContent = "";
--- 501,505 ----
newDoc.add(new Field("itemannostatus", fieldContent, Field.Store.YES, Field.Index.TOKENIZED));
}
!
// Anno formats
fieldContent = "";
***************
*** 499,506 ****
newDoc.add(new Field("itemannoformats", fieldContent, Field.Store.YES, Field.Index.TOKENIZED));
}
-
- // Anno rating information and statistics
- indexAnnoRatings(myAnnoResultDocs, newDoc);
// Part of DRC?
/*
--- 510,516 ----
newDoc.add(new Field("itemannoformats", fieldContent, Field.Store.YES, Field.Index.TOKENIZED));
}
+ // Anno rating information and statistics
+ indexAnnoRatings(myAnnoResultDocs, newDoc); */
// Part of DRC?
/*
***************
*** 508,512 ****
A. One or more ADN records that catalog the resource comes from a DRC
collection (as indicated by it's collection record)
!
B. The logic for determining whether an annotated resources is in the
DRC is the following:
--- 518,522 ----
A. One or more ADN records that catalog the resource comes from a DRC
collection (as indicated by it's collection record)
!
B. The logic for determining whether an annotated resources is in the
DRC is the following:
***************
*** 522,526 ****
if (collDoc.isPartOfDRC() ||
recordDataService.hasDRCItem(assocItemResultDocs) ||
! recordDataService.hasDRCAnnotation(myAnnoResultDocs))
partOfDrc = "true";
else
--- 532,536 ----
if (collDoc.isPartOfDRC() ||
recordDataService.hasDRCItem(assocItemResultDocs) ||
! recordDataService.hasDRCAnnotation(getMyAnnoResultDocs()))
partOfDrc = "true";
else
***************
*** 551,554 ****
--- 561,566 ----
String myCollectionKey = getCollections()[0];
myCollectionKey = getFieldContent(myCollectionKey, "key", "dlese_collect");
+ ArrayList completedAnnoCollectionKeys
+ = recordDataService.getCompletedAnnoCollectionKeysFromResultDocs(getMyAnnoResultDocs());
String associatedcollectionkeys = "";
fieldList = recordDataService.getCollectionKeysFromResultDocs(assocItemResultDocs);
***************
*** 609,615 ****
}
! // If no record data service, mark the item as having no annos or associated ids
else {
! newDoc.add(new Field("itemhasanno", "false", Field.Store.YES, Field.Index.TOKENIZED));
newDoc.add(new Field("hasassociatedids", "false", Field.Store.YES, Field.Index.TOKENIZED));
DleseCollectionDocReader myCollectionDoc = getMyCollectionDoc();
--- 621,627 ----
}
! // If no record data service, mark the item as having associated ids
else {
! /* newDoc.add(new Field("itemhasanno", "false", Field.Store.YES, Field.Index.TOKENIZED)); */
newDoc.add(new Field("hasassociatedids", "false", Field.Store.YES, Field.Index.TOKENIZED));
DleseCollectionDocReader myCollectionDoc = getMyCollectionDoc();
***************
*** 702,714 ****
// The total number of ratings assigned to this resource
newDoc.add(new Field("itemannonumratings", new DecimalFormat("00000").format(numRatings), Field.Store.YES, Field.Index.UN_TOKENIZED));
!
// A String of all the ratings assigned to this resource, as numbers (e.g. '1 1 2 4 2 3')
! if(ratings!= null && ratings.length() > 0)
! newDoc.add(new Field("itemannoratingvalues", ratings, Field.Store.YES, Field.Index.TOKENIZED));
!
// The average rating for this resource
if (numRatings > 0 || totalRating > 0) {
float aveRating = (totalRating / numRatings);
!
NumberFormat formatter = new DecimalFormat("0.000");
//prtln("ave rating: " + aveRating + " string: " + formatter.format(aveRating));
--- 714,726 ----
// The total number of ratings assigned to this resource
newDoc.add(new Field("itemannonumratings", new DecimalFormat("00000").format(numRatings), Field.Store.YES, Field.Index.UN_TOKENIZED));
!
// A String of all the ratings assigned to this resource, as numbers (e.g. '1 1 2 4 2 3')
! if (ratings != null && ratings.length() > 0)
! newDoc.add(new Field("itemannoratingvalues", ratings, Field.Store.YES, Field.Index.TOKENIZED));
!
// The average rating for this resource
if (numRatings > 0 || totalRating > 0) {
float aveRating = (totalRating / numRatings);
!
NumberFormat formatter = new DecimalFormat("0.000");
//prtln("ave rating: " + aveRating + " string: " + formatter.format(aveRating));
***************
*** 820,824 ****
}
-
}
--- 832,835 ----
|