[Dlsciences-dlese-tools] dlese-tools-project/src/org/dlese/dpc/index/writer XMLFileIndexingWriter.

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 454-5900

Update of /cvsroot/dlsciences/dlese-tools-project/src/org/dlese/dpc/index/writer
In directory sfp-cvsdas-3.v30.ch3.sourceforge.com:/tmp/cvs-serv2387/src/org/dlese/dpc/index/writer

Modified Files:
	XMLFileIndexingWriter.java 
Log Message:
-implemented ability to assign arbitrary relations (isAnnotateBy, stardardProvidedBy, isPartOfList, etc.) to/from any XML framework via config

Index: XMLFileIndexingWriter.java
===================================================================
RCS file: /cvsroot/dlsciences/dlese-tools-project/src/org/dlese/dpc/index/writer/XMLFileIndexingWriter.java,v
retrieving revision 1.62
retrieving revision 1.63
diff -C2 -d -r1.62 -r1.63
*** XMLFileIndexingWriter.java	26 May 2010 23:21:01 -0000	1.62
--- XMLFileIndexingWriter.java	12 Jun 2010 00:15:23 -0000	1.63
***************
*** 23,26 ****
--- 23,27 ----
  import org.apache.lucene.search.*;
  import org.apache.lucene.index.*;
+ import org.apache.lucene.analysis.KeywordAnalyzer;
  
  import org.dlese.dpc.xml.*;
***************
*** 54,59 ****
  	private String[] _collections = null;
  	private ResultDoc[] _myAnnoResultDocs = null;
- 	private boolean _itemHasRelations = false;
- 	
  
  	/**  Constructor for the XMLFileIndexingWriter. */
--- 55,58 ----
***************
*** 90,93 ****
--- 89,142 ----
  
  	/**
+ 	 *  Gets the ids of related records.
+ 	 *
+ 	 * @return                            The related ids value, or null if none
+ 	 * @exception  IllegalStateException  If called prior to calling method #indexFields
+ 	 * @exception  Exception              If error
+ 	 */
+ 	public List getRelatedIds() throws IllegalStateException, Exception {
+ 		return getXmlIndexer().getRelatedIds();
+ 	}
+ 
+ 
+ 	/**
+ 	 *  Gets the urls of related records.
+ 	 *
+ 	 * @return                            The related urls value, or null if none
+ 	 * @exception  IllegalStateException  If called prior to calling method #indexFields
+ 	 * @exception  Exception              If error
+ 	 */
+ 	public List getRelatedUrls() throws IllegalStateException, Exception {
+ 		return getXmlIndexer().getRelatedUrls();
+ 	}
+ 
+ 	
+ 	/**
+ 	 *  Gets the ids of related records. The Map key contains the relationship (isAnnotatedBy, etc.) and the Map
+ 	 *  value contains a List of Strings that indicate the ids of the target records.
+ 	 *
+ 	 * @return                            The related ids value, or null if none
+ 	 * @exception  IllegalStateException  If called prior to calling method #indexFields
+ 	 * @exception  Exception              If error
+ 	 */
+ 	public Map getRelatedIdsMap() throws IllegalStateException, Exception {
+ 		return getXmlIndexer().getRelatedIdsMap();
+ 	}
+ 
+ 
+ 	/**
+ 	 *  Gets the urls of related records. The Map key contains the relationship (isAnnotatedBy, etc.) and the Map
+ 	 *  value contains a List of Strings that indicate the urls of the target records.
+ 	 *
+ 	 * @return                            The related urls value, or null if none
+ 	 * @exception  IllegalStateException  If called prior to calling method #indexFields
+ 	 * @exception  Exception              If error
+ 	 */
+ 	public Map getRelatedUrlsMap() throws IllegalStateException, Exception {
+ 		return getXmlIndexer().getRelatedUrlsMap();
+ 	}	
+ 	
+ 
+ 	/**
  	 *  Returns unique collection keys for the item being indexed. For example "dcc" (single collection) or "dcc
  	 *  dwel" (multiple collections). If more than one collection is provided, the first one must be the primary
***************
*** 320,362 ****
  			}
  		}
! 		
! 		// Index the related IDs/urls for this item:
! 		Map relationMap = xmlIndexer.getRelations();
! 		if(relationMap != null){
! 			Iterator it = relationMap.keySet().iterator();
  			while (it.hasNext()) {
! 				String relationshipName = (String)it.next();
! 				List ids = (List)relationMap.get(relationshipName);
! 				//prtln("processing relation: relationshipName: " + relationshipName + " ids: " + Arrays.toString(ids.toArray()));
  			}
  		}
  
  		// ------ [end] Standard XML indexing handled by XMLIndexer ------------
  
! 		
! 		// ------ Index relations for this item ------------
! 		
! 		ResultDoc[] myAnnoResultDocs = getMyAnnoResultDocs();
! 		
  		// Index the annotations as a standard relation:
! 		indexRelation(myAnnoResultDocs,"isAnnotatedBy",newDoc);
! 		
! 		// To do: Implement support for other configurable relations types...		
  
- 		// If one or more relations have been indexed, indicate as so:
- 		if(_itemHasRelations) {
- 			newDoc.add(new Field("itemhasrelations", "true", Field.Store.YES, Field.Index.UN_TOKENIZED));
- 		} else {
- 			newDoc.add(new Field("itemhasrelations", "false", Field.Store.YES, Field.Index.UN_TOKENIZED));
- 		}
- 		
- 		
  		// ------ [end] Index relations for this item ------------
  
! 		
  		// ----------- Annotations for this item ------------------
! 		
  		// Note: See some related index fields applied in ItemFileIndexingWriter
! 		
  		// Add anno fields only available if the RecordDataService is avail:
  		if (recordDataService != null) {
--- 369,448 ----
  			}
  		}
! 
! 		// --- Index things this item relates to (is an annotation for, etc), e.g. isRelatedTo:
! 
! 		boolean itemAssignsRelationships = false;
! 
! 		// Index the related IDs for this item:
! 		Map relatedIdsMap = xmlIndexer.getRelatedIdsMap();
! 		prtln("xmlIndexer.getRelatedIds()");
! 		if (relatedIdsMap != null) {
! 			prtln("xmlIndexer.getRelatedIds() has some!");
! 			Iterator it = relatedIdsMap.keySet().iterator();
  			while (it.hasNext()) {
! 				String relationshipName = (String) it.next();
! 				List ids = (List) relatedIdsMap.get(relationshipName);
! 				//prtln("processing id relation: relationshipName: " + relationshipName + " ids: " + Arrays.toString(ids.toArray()));
! 
! 				// Index the IDs so these docs can be retrieved later:
! 				for (int i = 0; i < ids.size(); i++) {
! 					itemAssignsRelationships = true;
! 					//newDoc.add(new Field("indexedRelationIds.isRelatedTo", ids.get(i).toString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
! 					newDoc.add(new Field("assignsRelationshipById." + relationshipName, ids.get(i).toString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
! 					newDoc.add(new Field("assignsRelationshipById", ids.get(i).toString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
! 				}
! 				//newDoc.add(new Field("indexedRelations", "isRelatedTo", Field.Store.YES, Field.Index.UN_TOKENIZED));
! 				newDoc.add(new Field("assignedRelationshipsById", relationshipName, Field.Store.YES, Field.Index.UN_TOKENIZED));
! 			}
! 		}
! 
! 		// Index the related urls for this item:
! 		Map relatedUrlsMap = xmlIndexer.getRelatedUrlsMap();
! 		if (relatedUrlsMap != null) {
! 			Iterator it = relatedUrlsMap.keySet().iterator();
! 			while (it.hasNext()) {
! 				String relationshipName = (String) it.next();
! 				List urls = (List) relatedUrlsMap.get(relationshipName);
! 				//prtln("processing url relation: relationshipName: " + relationshipName + " urls: " + Arrays.toString(ids.toArray()));
! 
! 				// Index the IDs so these docs can be retrieved later:
! 				for (int i = 0; i < urls.size(); i++) {
! 					itemAssignsRelationships = true;
! 					//newDoc.add(new Field("indexedRelationIds.isRelatedTo", ids.get(i).toString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
! 					newDoc.add(new Field("assignsRelationshipByUrl." + relationshipName, urls.get(i).toString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
! 					newDoc.add(new Field("assignsRelationshipByUrl", urls.get(i).toString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
! 				}
! 				//newDoc.add(new Field("indexedRelations", "isRelatedTo", Field.Store.YES, Field.Index.UN_TOKENIZED));
! 				newDoc.add(new Field("assignedRelationshipsByUrl", relationshipName, Field.Store.YES, Field.Index.UN_TOKENIZED));
  			}
  		}
  
+ 		// Mark if this item assigns a relationship:
+ 		newDoc.add(new Field("assignedRelationshipIsDefined", (itemAssignsRelationships ? "true" : "false"), Field.Store.YES, Field.Index.UN_TOKENIZED));
+ 
  		// ------ [end] Standard XML indexing handled by XMLIndexer ------------
  
! 
! 		// ------ Index relations for this item (records that have a relation to this (isAnnotatedBy, etc.) ------------
! 
! 		// The new way to handle relations....
! 		indexRelations(newDoc);
! 
  		// Index the annotations as a standard relation:
! 		//indexRelation(myAnnoResultDocs,"isAnnotatedBy",newDoc);
! 
! 		// To do: Implement support for other configurable relations types...
! 
! 
  
  		// ------ [end] Index relations for this item ------------
  
! 
  		// ----------- Annotations for this item ------------------
! 
  		// Note: See some related index fields applied in ItemFileIndexingWriter
! 
! 		ResultDoc[] myAnnoResultDocs = getMyAnnoResultDocs();
! 
  		// Add anno fields only available if the RecordDataService is avail:
  		if (recordDataService != null) {
***************
*** 442,448 ****
  			newDoc.add(new Field("itemhasanno", "false", Field.Store.YES, Field.Index.TOKENIZED));
  		}
- 		
- 		// ----------- [end] Annotations for this item ------------------
  
  
  		// ----------- Global fields for all XML records and sub-class handlers -------------
--- 528,533 ----
  			newDoc.add(new Field("itemhasanno", "false", Field.Store.YES, Field.Index.TOKENIZED));
  		}
  
+ 		// ----------- [end] Annotations for this item ------------------
  
  		// ----------- Global fields for all XML records and sub-class handlers -------------
***************
*** 464,468 ****
  			}
  		}
! 		
  		// Store the ID for the collection I am a member of. (The first time the index is built, the DocReader for the 'collect' collection is not available):
  		String key = getCollections()[0];
--- 549,553 ----
  			}
  		}
! 
  		// Store the ID for the collection I am a member of. (The first time the index is built, the DocReader for the 'collect' collection is not available):
  		String key = getCollections()[0];
***************
*** 471,483 ****
  		if (dleseCollectionDocReader != null)
  			myCollectionRecordIdValue = dleseCollectionDocReader.getId();
! 		else if(recordDataService != null && recordDataService.getCollectCollectionID() != null)
  			myCollectionRecordIdValue = recordDataService.getCollectCollectionID();
! 		else if(key != null && key.equals("collect"))
  			myCollectionRecordIdValue = "ID-FOR-COLLECT-NOT-YET-AVAILABLE";
! 		
  		// If no collection info (such as jOAI).
! 		if(myCollectionRecordIdValue == null)
  			myCollectionRecordIdValue = "COLLECTION-ID-NOT-AVAILABLE";
! 		
  		newDoc.add(new Field("myCollectionRecordIdValue", myCollectionRecordIdValue, Field.Store.YES, Field.Index.NO));
  
--- 556,568 ----
  		if (dleseCollectionDocReader != null)
  			myCollectionRecordIdValue = dleseCollectionDocReader.getId();
! 		else if (recordDataService != null && recordDataService.getCollectCollectionID() != null)
  			myCollectionRecordIdValue = recordDataService.getCollectCollectionID();
! 		else if (key != null && key.equals("collect"))
  			myCollectionRecordIdValue = "ID-FOR-COLLECT-NOT-YET-AVAILABLE";
! 
  		// If no collection info (such as jOAI).
! 		if (myCollectionRecordIdValue == null)
  			myCollectionRecordIdValue = "COLLECTION-ID-NOT-AVAILABLE";
! 
  		newDoc.add(new Field("myCollectionRecordIdValue", myCollectionRecordIdValue, Field.Store.YES, Field.Index.NO));
  
***************
*** 593,628 ****
  		addFields(newDoc, existingDoc, sourceFile);
  	}
! 	
  	/**
  	 *  Indexes a relation for this item.
  	 *
! 	 * @param  relatedDocs  	An array of ResultDocs that contain the records that are related to this one
! 	 * @param  relationType  	The type of relationship, for example 'isAnnotatedBy'	 
! 	 * @param  luceneDoc		The Document to add the fields to
! 	 */	
! 	private void indexRelation(ResultDoc[] relatedDocs, String relationType, Document luceneDoc) throws Exception {	
! 		if(relatedDocs != null && relatedDocs.length > 0) {
  			List relatedIds = new ArrayList();
! 			for(int i = 0; i < relatedDocs.length; i++) {
! 				XMLDocReader xmlDocReader = (XMLDocReader)(relatedDocs[i].getDocReader());
  				// Index all xPaths for this item
! 				XMLIndexer xmlIndexer =  new XMLIndexer(xmlDocReader.getXml(), xmlDocReader.getDoctype(), getXmlIndexerFieldsConfig());
  				xmlIndexer.setXPathFieldsPrefix("/relation." + relationType + "/");
! 				
  				// Index just the XPath fields:
  				xmlIndexer.indexXpathFields(luceneDoc);
  				relatedIds.add(xmlDocReader.getId());
  			}
! 			
  			// Index the IDs so these docs can be retrieved later:
! 			for(int i = 0; i < relatedIds.size(); i++) {
! 				luceneDoc.add(new Field("indexedRelationIds."+relationType, relatedIds.get(i).toString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
! 			}			
! 		
  			luceneDoc.add(new Field("indexedRelations", relationType, Field.Store.YES, Field.Index.UN_TOKENIZED));
! 			
! 			_itemHasRelations = true;
  		}
! 	}
  
  	/**
--- 678,793 ----
  		addFields(newDoc, existingDoc, sourceFile);
  	}
! 
! 
! 	private void indexRelations(Document luceneDoc) throws Exception {
! 
! 		// Get all the records that are related to me:
! 		String[] myIds = getIds();
! 		String[] myUrls = getUrls();
! 		if ( ((myIds == null || myIds.length == 0) && (myUrls == null || myUrls.length == 0)) || getIndex() == null) {
! 			return;
! 		}
! 		try {
! 			
! 			BooleanQuery idQ = new BooleanQuery();
! 			if(myIds != null) {
! 				for (int i = 0; i < myIds.length; i++)
! 					idQ.add(new TermQuery(new Term("assignsRelationshipById", myIds[i])), BooleanClause.Occur.SHOULD);
! 			}
! 			
! 			if(myUrls != null) {
! 				for (int i = 0; i < myUrls.length; i++)
! 					idQ.add(new TermQuery(new Term("assignsRelationshipByUrl", myUrls[i])), BooleanClause.Occur.SHOULD);
! 			}
! 			
! 			ResultDoc[] relatedDocs =
! 				getIndex().searchDocs(idQ);
! 			if (relatedDocs == null || relatedDocs.length == 0) {
! 				prtln("indexRelations(): " + idQ + " num: 0");
! 				return;
! 			}
! 			else {
! 				prtln("indexRelations(): " + idQ + " num: " + relatedDocs.length);
! 				//boolean itemHasRelations = false;
! 
! 				//Index my relations...
! 				//List relatedIds = new ArrayList();
! 				for (int i = 0; i < relatedDocs.length; i++) {
! 					XMLDocReader xmlDocReader = (XMLDocReader) (relatedDocs[i].getDocReader());
! 					
! 					// Get the list of relations assigned for this
! 					List myRelationTypes = (List)xmlDocReader.getAssignedRelationshipsForItemsMap().get(this.getPrimaryId());
! 					
! 					if(myRelationTypes != null) {
! 						for(int j = 0; j < myRelationTypes.size(); j++) { 
! 							String relationType = (String)myRelationTypes.get(j);
! 		
! 							// Index all xPaths for this item
! 							XMLIndexer xmlIndexer = new XMLIndexer(xmlDocReader.getXml(), xmlDocReader.getDoctype(), getXmlIndexerFieldsConfig());
! 							xmlIndexer.setXPathFieldsPrefix("/relation." + relationType + "/");
! 		
! 							// Index just the XPath fields:
! 							xmlIndexer.indexXpathFields(luceneDoc);
! 							//relatedIds.add(xmlDocReader.getId());
! 							
! 							// Index the IDs so these docs can be retrieved later:
! 							luceneDoc.add(new Field("indexedRelationIds." + relationType, xmlDocReader.getId(), Field.Store.YES, Field.Index.UN_TOKENIZED));
! 							luceneDoc.add(new Field("indexedRelations", relationType, Field.Store.YES, Field.Index.UN_TOKENIZED));
! 						}
! 					}
! 				}
! 			}
! 			
! 			// If one or more relations have been indexed, indicate as so:
! 			/* if (itemHasRelations) {
! 				newDoc.add(new Field("itemhasrelations", "true", Field.Store.YES, Field.Index.UN_TOKENIZED));
! 			}
! 			else {
! 				newDoc.add(new Field("itemhasrelations", "false", Field.Store.YES, Field.Index.UN_TOKENIZED));
! 			}	 */		
! 			
! 		} catch (Throwable e) {
! 			prtlnErr("indexRelations(): " + e);
! 			e.printStackTrace();
! 			return;
! 		}
! 	}
! 
! 
  	/**
  	 *  Indexes a relation for this item.
  	 *
! 	 * @param  relatedDocs    An array of ResultDocs that contain the records that are related to this one
! 	 * @param  luceneDoc      The Document to add the fields to
! 	 * @exception  Exception  NOT YET DOCUMENTED
! 	 */
! /* 	private void indexRelationZZZ(ResultDoc[] relatedDocs, Document luceneDoc) throws Exception {
! 		if (relatedDocs != null && relatedDocs.length > 0) {
  			List relatedIds = new ArrayList();
! 			for (int i = 0; i < relatedDocs.length; i++) {
! 				XMLDocReader xmlDocReader = (XMLDocReader) (relatedDocs[i].getDocReader());
! 
! 				String relationType = "isAnnotatedBy";
! 
  				// Index all xPaths for this item
! 				XMLIndexer xmlIndexer = new XMLIndexer(xmlDocReader.getXml(), xmlDocReader.getDoctype(), getXmlIndexerFieldsConfig());
  				xmlIndexer.setXPathFieldsPrefix("/relation." + relationType + "/");
! 
  				// Index just the XPath fields:
  				xmlIndexer.indexXpathFields(luceneDoc);
  				relatedIds.add(xmlDocReader.getId());
  			}
! 
  			// Index the IDs so these docs can be retrieved later:
! 			for (int i = 0; i < relatedIds.size(); i++) {
! 				luceneDoc.add(new Field("indexedRelationIds." + relationType, relatedIds.get(i).toString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
! 			}
! 
  			luceneDoc.add(new Field("indexedRelations", relationType, Field.Store.YES, Field.Index.UN_TOKENIZED));
! 
! 			itemHasRelations = true;
  		}
! 	} */
! 
  
  	/**
***************
*** 750,757 ****
  	}
  
  	/**
  	 *  Gets the annotations for this record, null or zero length if none available.
  	 *
! 	 * @return    The myAnnoResultDocs value
  	 */
  	protected ResultDoc[] getMyAnnoResultDocs() throws Exception {
--- 915,924 ----
  	}
  
+ 
  	/**
  	 *  Gets the annotations for this record, null or zero length if none available.
  	 *
! 	 * @return                The myAnnoResultDocs value
! 	 * @exception  Exception  NOT YET DOCUMENTED
  	 */
  	protected ResultDoc[] getMyAnnoResultDocs() throws Exception {
***************
*** 766,769 ****
--- 933,937 ----
  	}
  
+ 
  	/**
  	 *  Gets the XMLIndexerFieldsConfig to use for XML indexing, or null if none available.
***************
*** 957,961 ****
  		if (myCollectionDocReader == null) {
  			RecordDataService recordDataService = getRecordDataService();
! 			
  			// RecordDataService is not available in OAI app:
  			if (recordDataService == null) {
--- 1125,1129 ----
  		if (myCollectionDocReader == null) {
  			RecordDataService recordDataService = getRecordDataService();
! 
  			// RecordDataService is not available in OAI app:
  			if (recordDataService == null) {

[Dlsciences-dlese-tools] dlese-tools-project/src/org/dlese/dpc/index/writer XMLFileIndexingWriter.

[Dlsciences-dlese-tools] dlese-tools-project/src/org/dlese/dpc/index/writer XMLFileIndexingWriter.java, 1.62, 1.63,