From: John W. <jwe...@us...> - 2010-06-12 00:15:54
|
Update of /cvsroot/dlsciences/dlese-tools-project/src/org/dlese/dpc/index/writer In directory sfp-cvsdas-3.v30.ch3.sourceforge.com:/tmp/cvs-serv2387/src/org/dlese/dpc/index/writer Modified Files: XMLFileIndexingWriter.java Log Message: -implemented ability to assign arbitrary relations (isAnnotateBy, stardardProvidedBy, isPartOfList, etc.) to/from any XML framework via config Index: XMLFileIndexingWriter.java =================================================================== RCS file: /cvsroot/dlsciences/dlese-tools-project/src/org/dlese/dpc/index/writer/XMLFileIndexingWriter.java,v retrieving revision 1.62 retrieving revision 1.63 diff -C2 -d -r1.62 -r1.63 *** XMLFileIndexingWriter.java 26 May 2010 23:21:01 -0000 1.62 --- XMLFileIndexingWriter.java 12 Jun 2010 00:15:23 -0000 1.63 *************** *** 23,26 **** --- 23,27 ---- import org.apache.lucene.search.*; import org.apache.lucene.index.*; + import org.apache.lucene.analysis.KeywordAnalyzer; import org.dlese.dpc.xml.*; *************** *** 54,59 **** private String[] _collections = null; private ResultDoc[] _myAnnoResultDocs = null; - private boolean _itemHasRelations = false; - /** Constructor for the XMLFileIndexingWriter. */ --- 55,58 ---- *************** *** 90,93 **** --- 89,142 ---- /** + * Gets the ids of related records. + * + * @return The related ids value, or null if none + * @exception IllegalStateException If called prior to calling method #indexFields + * @exception Exception If error + */ + public List getRelatedIds() throws IllegalStateException, Exception { + return getXmlIndexer().getRelatedIds(); + } + + + /** + * Gets the urls of related records. + * + * @return The related urls value, or null if none + * @exception IllegalStateException If called prior to calling method #indexFields + * @exception Exception If error + */ + public List getRelatedUrls() throws IllegalStateException, Exception { + return getXmlIndexer().getRelatedUrls(); + } + + + /** + * Gets the ids of related records. The Map key contains the relationship (isAnnotatedBy, etc.) and the Map + * value contains a List of Strings that indicate the ids of the target records. + * + * @return The related ids value, or null if none + * @exception IllegalStateException If called prior to calling method #indexFields + * @exception Exception If error + */ + public Map getRelatedIdsMap() throws IllegalStateException, Exception { + return getXmlIndexer().getRelatedIdsMap(); + } + + + /** + * Gets the urls of related records. The Map key contains the relationship (isAnnotatedBy, etc.) and the Map + * value contains a List of Strings that indicate the urls of the target records. + * + * @return The related urls value, or null if none + * @exception IllegalStateException If called prior to calling method #indexFields + * @exception Exception If error + */ + public Map getRelatedUrlsMap() throws IllegalStateException, Exception { + return getXmlIndexer().getRelatedUrlsMap(); + } + + + /** * Returns unique collection keys for the item being indexed. For example "dcc" (single collection) or "dcc * dwel" (multiple collections). If more than one collection is provided, the first one must be the primary *************** *** 320,362 **** } } ! ! // Index the related IDs/urls for this item: ! Map relationMap = xmlIndexer.getRelations(); ! if(relationMap != null){ ! Iterator it = relationMap.keySet().iterator(); while (it.hasNext()) { ! String relationshipName = (String)it.next(); ! List ids = (List)relationMap.get(relationshipName); ! //prtln("processing relation: relationshipName: " + relationshipName + " ids: " + Arrays.toString(ids.toArray())); } } // ------ [end] Standard XML indexing handled by XMLIndexer ------------ ! ! // ------ Index relations for this item ------------ ! ! ResultDoc[] myAnnoResultDocs = getMyAnnoResultDocs(); ! // Index the annotations as a standard relation: ! indexRelation(myAnnoResultDocs,"isAnnotatedBy",newDoc); ! ! // To do: Implement support for other configurable relations types... - // If one or more relations have been indexed, indicate as so: - if(_itemHasRelations) { - newDoc.add(new Field("itemhasrelations", "true", Field.Store.YES, Field.Index.UN_TOKENIZED)); - } else { - newDoc.add(new Field("itemhasrelations", "false", Field.Store.YES, Field.Index.UN_TOKENIZED)); - } - - // ------ [end] Index relations for this item ------------ ! // ----------- Annotations for this item ------------------ ! // Note: See some related index fields applied in ItemFileIndexingWriter ! // Add anno fields only available if the RecordDataService is avail: if (recordDataService != null) { --- 369,448 ---- } } ! ! // --- Index things this item relates to (is an annotation for, etc), e.g. isRelatedTo: ! ! boolean itemAssignsRelationships = false; ! ! // Index the related IDs for this item: ! Map relatedIdsMap = xmlIndexer.getRelatedIdsMap(); ! prtln("xmlIndexer.getRelatedIds()"); ! if (relatedIdsMap != null) { ! prtln("xmlIndexer.getRelatedIds() has some!"); ! Iterator it = relatedIdsMap.keySet().iterator(); while (it.hasNext()) { ! String relationshipName = (String) it.next(); ! List ids = (List) relatedIdsMap.get(relationshipName); ! //prtln("processing id relation: relationshipName: " + relationshipName + " ids: " + Arrays.toString(ids.toArray())); ! ! // Index the IDs so these docs can be retrieved later: ! for (int i = 0; i < ids.size(); i++) { ! itemAssignsRelationships = true; ! //newDoc.add(new Field("indexedRelationIds.isRelatedTo", ids.get(i).toString(), Field.Store.YES, Field.Index.UN_TOKENIZED)); ! newDoc.add(new Field("assignsRelationshipById." + relationshipName, ids.get(i).toString(), Field.Store.YES, Field.Index.UN_TOKENIZED)); ! newDoc.add(new Field("assignsRelationshipById", ids.get(i).toString(), Field.Store.YES, Field.Index.UN_TOKENIZED)); ! } ! //newDoc.add(new Field("indexedRelations", "isRelatedTo", Field.Store.YES, Field.Index.UN_TOKENIZED)); ! newDoc.add(new Field("assignedRelationshipsById", relationshipName, Field.Store.YES, Field.Index.UN_TOKENIZED)); ! } ! } ! ! // Index the related urls for this item: ! Map relatedUrlsMap = xmlIndexer.getRelatedUrlsMap(); ! if (relatedUrlsMap != null) { ! Iterator it = relatedUrlsMap.keySet().iterator(); ! while (it.hasNext()) { ! String relationshipName = (String) it.next(); ! List urls = (List) relatedUrlsMap.get(relationshipName); ! //prtln("processing url relation: relationshipName: " + relationshipName + " urls: " + Arrays.toString(ids.toArray())); ! ! // Index the IDs so these docs can be retrieved later: ! for (int i = 0; i < urls.size(); i++) { ! itemAssignsRelationships = true; ! //newDoc.add(new Field("indexedRelationIds.isRelatedTo", ids.get(i).toString(), Field.Store.YES, Field.Index.UN_TOKENIZED)); ! newDoc.add(new Field("assignsRelationshipByUrl." + relationshipName, urls.get(i).toString(), Field.Store.YES, Field.Index.UN_TOKENIZED)); ! newDoc.add(new Field("assignsRelationshipByUrl", urls.get(i).toString(), Field.Store.YES, Field.Index.UN_TOKENIZED)); ! } ! //newDoc.add(new Field("indexedRelations", "isRelatedTo", Field.Store.YES, Field.Index.UN_TOKENIZED)); ! newDoc.add(new Field("assignedRelationshipsByUrl", relationshipName, Field.Store.YES, Field.Index.UN_TOKENIZED)); } } + // Mark if this item assigns a relationship: + newDoc.add(new Field("assignedRelationshipIsDefined", (itemAssignsRelationships ? "true" : "false"), Field.Store.YES, Field.Index.UN_TOKENIZED)); + // ------ [end] Standard XML indexing handled by XMLIndexer ------------ ! ! // ------ Index relations for this item (records that have a relation to this (isAnnotatedBy, etc.) ------------ ! ! // The new way to handle relations.... ! indexRelations(newDoc); ! // Index the annotations as a standard relation: ! //indexRelation(myAnnoResultDocs,"isAnnotatedBy",newDoc); ! ! // To do: Implement support for other configurable relations types... ! ! // ------ [end] Index relations for this item ------------ ! // ----------- Annotations for this item ------------------ ! // Note: See some related index fields applied in ItemFileIndexingWriter ! ! ResultDoc[] myAnnoResultDocs = getMyAnnoResultDocs(); ! // Add anno fields only available if the RecordDataService is avail: if (recordDataService != null) { *************** *** 442,448 **** newDoc.add(new Field("itemhasanno", "false", Field.Store.YES, Field.Index.TOKENIZED)); } - - // ----------- [end] Annotations for this item ------------------ // ----------- Global fields for all XML records and sub-class handlers ------------- --- 528,533 ---- newDoc.add(new Field("itemhasanno", "false", Field.Store.YES, Field.Index.TOKENIZED)); } + // ----------- [end] Annotations for this item ------------------ // ----------- Global fields for all XML records and sub-class handlers ------------- *************** *** 464,468 **** } } ! // Store the ID for the collection I am a member of. (The first time the index is built, the DocReader for the 'collect' collection is not available): String key = getCollections()[0]; --- 549,553 ---- } } ! // Store the ID for the collection I am a member of. (The first time the index is built, the DocReader for the 'collect' collection is not available): String key = getCollections()[0]; *************** *** 471,483 **** if (dleseCollectionDocReader != null) myCollectionRecordIdValue = dleseCollectionDocReader.getId(); ! else if(recordDataService != null && recordDataService.getCollectCollectionID() != null) myCollectionRecordIdValue = recordDataService.getCollectCollectionID(); ! else if(key != null && key.equals("collect")) myCollectionRecordIdValue = "ID-FOR-COLLECT-NOT-YET-AVAILABLE"; ! // If no collection info (such as jOAI). ! if(myCollectionRecordIdValue == null) myCollectionRecordIdValue = "COLLECTION-ID-NOT-AVAILABLE"; ! newDoc.add(new Field("myCollectionRecordIdValue", myCollectionRecordIdValue, Field.Store.YES, Field.Index.NO)); --- 556,568 ---- if (dleseCollectionDocReader != null) myCollectionRecordIdValue = dleseCollectionDocReader.getId(); ! else if (recordDataService != null && recordDataService.getCollectCollectionID() != null) myCollectionRecordIdValue = recordDataService.getCollectCollectionID(); ! else if (key != null && key.equals("collect")) myCollectionRecordIdValue = "ID-FOR-COLLECT-NOT-YET-AVAILABLE"; ! // If no collection info (such as jOAI). ! if (myCollectionRecordIdValue == null) myCollectionRecordIdValue = "COLLECTION-ID-NOT-AVAILABLE"; ! newDoc.add(new Field("myCollectionRecordIdValue", myCollectionRecordIdValue, Field.Store.YES, Field.Index.NO)); *************** *** 593,628 **** addFields(newDoc, existingDoc, sourceFile); } ! /** * Indexes a relation for this item. * ! * @param relatedDocs An array of ResultDocs that contain the records that are related to this one ! * @param relationType The type of relationship, for example 'isAnnotatedBy' ! * @param luceneDoc The Document to add the fields to ! */ ! private void indexRelation(ResultDoc[] relatedDocs, String relationType, Document luceneDoc) throws Exception { ! if(relatedDocs != null && relatedDocs.length > 0) { List relatedIds = new ArrayList(); ! for(int i = 0; i < relatedDocs.length; i++) { ! XMLDocReader xmlDocReader = (XMLDocReader)(relatedDocs[i].getDocReader()); // Index all xPaths for this item ! XMLIndexer xmlIndexer = new XMLIndexer(xmlDocReader.getXml(), xmlDocReader.getDoctype(), getXmlIndexerFieldsConfig()); xmlIndexer.setXPathFieldsPrefix("/relation." + relationType + "/"); ! // Index just the XPath fields: xmlIndexer.indexXpathFields(luceneDoc); relatedIds.add(xmlDocReader.getId()); } ! // Index the IDs so these docs can be retrieved later: ! for(int i = 0; i < relatedIds.size(); i++) { ! luceneDoc.add(new Field("indexedRelationIds."+relationType, relatedIds.get(i).toString(), Field.Store.YES, Field.Index.UN_TOKENIZED)); ! } ! luceneDoc.add(new Field("indexedRelations", relationType, Field.Store.YES, Field.Index.UN_TOKENIZED)); ! ! _itemHasRelations = true; } ! } /** --- 678,793 ---- addFields(newDoc, existingDoc, sourceFile); } ! ! ! private void indexRelations(Document luceneDoc) throws Exception { ! ! // Get all the records that are related to me: ! String[] myIds = getIds(); ! String[] myUrls = getUrls(); ! if ( ((myIds == null || myIds.length == 0) && (myUrls == null || myUrls.length == 0)) || getIndex() == null) { ! return; ! } ! try { ! ! BooleanQuery idQ = new BooleanQuery(); ! if(myIds != null) { ! for (int i = 0; i < myIds.length; i++) ! idQ.add(new TermQuery(new Term("assignsRelationshipById", myIds[i])), BooleanClause.Occur.SHOULD); ! } ! ! if(myUrls != null) { ! for (int i = 0; i < myUrls.length; i++) ! idQ.add(new TermQuery(new Term("assignsRelationshipByUrl", myUrls[i])), BooleanClause.Occur.SHOULD); ! } ! ! ResultDoc[] relatedDocs = ! getIndex().searchDocs(idQ); ! if (relatedDocs == null || relatedDocs.length == 0) { ! prtln("indexRelations(): " + idQ + " num: 0"); ! return; ! } ! else { ! prtln("indexRelations(): " + idQ + " num: " + relatedDocs.length); ! //boolean itemHasRelations = false; ! ! //Index my relations... ! //List relatedIds = new ArrayList(); ! for (int i = 0; i < relatedDocs.length; i++) { ! XMLDocReader xmlDocReader = (XMLDocReader) (relatedDocs[i].getDocReader()); ! ! // Get the list of relations assigned for this ! List myRelationTypes = (List)xmlDocReader.getAssignedRelationshipsForItemsMap().get(this.getPrimaryId()); ! ! if(myRelationTypes != null) { ! for(int j = 0; j < myRelationTypes.size(); j++) { ! String relationType = (String)myRelationTypes.get(j); ! ! // Index all xPaths for this item ! XMLIndexer xmlIndexer = new XMLIndexer(xmlDocReader.getXml(), xmlDocReader.getDoctype(), getXmlIndexerFieldsConfig()); ! xmlIndexer.setXPathFieldsPrefix("/relation." + relationType + "/"); ! ! // Index just the XPath fields: ! xmlIndexer.indexXpathFields(luceneDoc); ! //relatedIds.add(xmlDocReader.getId()); ! ! // Index the IDs so these docs can be retrieved later: ! luceneDoc.add(new Field("indexedRelationIds." + relationType, xmlDocReader.getId(), Field.Store.YES, Field.Index.UN_TOKENIZED)); ! luceneDoc.add(new Field("indexedRelations", relationType, Field.Store.YES, Field.Index.UN_TOKENIZED)); ! } ! } ! } ! } ! ! // If one or more relations have been indexed, indicate as so: ! /* if (itemHasRelations) { ! newDoc.add(new Field("itemhasrelations", "true", Field.Store.YES, Field.Index.UN_TOKENIZED)); ! } ! else { ! newDoc.add(new Field("itemhasrelations", "false", Field.Store.YES, Field.Index.UN_TOKENIZED)); ! } */ ! ! } catch (Throwable e) { ! prtlnErr("indexRelations(): " + e); ! e.printStackTrace(); ! return; ! } ! } ! ! /** * Indexes a relation for this item. * ! * @param relatedDocs An array of ResultDocs that contain the records that are related to this one ! * @param luceneDoc The Document to add the fields to ! * @exception Exception NOT YET DOCUMENTED ! */ ! /* private void indexRelationZZZ(ResultDoc[] relatedDocs, Document luceneDoc) throws Exception { ! if (relatedDocs != null && relatedDocs.length > 0) { List relatedIds = new ArrayList(); ! for (int i = 0; i < relatedDocs.length; i++) { ! XMLDocReader xmlDocReader = (XMLDocReader) (relatedDocs[i].getDocReader()); ! ! String relationType = "isAnnotatedBy"; ! // Index all xPaths for this item ! XMLIndexer xmlIndexer = new XMLIndexer(xmlDocReader.getXml(), xmlDocReader.getDoctype(), getXmlIndexerFieldsConfig()); xmlIndexer.setXPathFieldsPrefix("/relation." + relationType + "/"); ! // Index just the XPath fields: xmlIndexer.indexXpathFields(luceneDoc); relatedIds.add(xmlDocReader.getId()); } ! // Index the IDs so these docs can be retrieved later: ! for (int i = 0; i < relatedIds.size(); i++) { ! luceneDoc.add(new Field("indexedRelationIds." + relationType, relatedIds.get(i).toString(), Field.Store.YES, Field.Index.UN_TOKENIZED)); ! } ! luceneDoc.add(new Field("indexedRelations", relationType, Field.Store.YES, Field.Index.UN_TOKENIZED)); ! ! itemHasRelations = true; } ! } */ ! /** *************** *** 750,757 **** } /** * Gets the annotations for this record, null or zero length if none available. * ! * @return The myAnnoResultDocs value */ protected ResultDoc[] getMyAnnoResultDocs() throws Exception { --- 915,924 ---- } + /** * Gets the annotations for this record, null or zero length if none available. * ! * @return The myAnnoResultDocs value ! * @exception Exception NOT YET DOCUMENTED */ protected ResultDoc[] getMyAnnoResultDocs() throws Exception { *************** *** 766,769 **** --- 933,937 ---- } + /** * Gets the XMLIndexerFieldsConfig to use for XML indexing, or null if none available. *************** *** 957,961 **** if (myCollectionDocReader == null) { RecordDataService recordDataService = getRecordDataService(); ! // RecordDataService is not available in OAI app: if (recordDataService == null) { --- 1125,1129 ---- if (myCollectionDocReader == null) { RecordDataService recordDataService = getRecordDataService(); ! // RecordDataService is not available in OAI app: if (recordDataService == null) { |