From: John W. <jwe...@us...> - 2011-01-12 20:33:31
|
Update of /cvsroot/dlsciences/dlese-tools-project/src/org/dlese/dpc/repository In directory sfp-cvsdas-3.v30.ch3.sourceforge.com:/tmp/cvs-serv8789/src/org/dlese/dpc/repository Modified Files: OAISetsMatchingFieldsBean.java RepositoryManager.java Log Message: -implemented ability to define OAI sets using Stored Lucene field/values (much more efficient than using queries) Index: RepositoryManager.java =================================================================== RCS file: /cvsroot/dlsciences/dlese-tools-project/src/org/dlese/dpc/repository/RepositoryManager.java,v retrieving revision 1.201 retrieving revision 1.202 diff -C2 -d -r1.201 -r1.202 *** RepositoryManager.java 12 Jan 2011 05:44:53 -0000 1.201 --- RepositoryManager.java 12 Jan 2011 20:33:23 -0000 1.202 *************** *** 5123,5127 **** /** * Define OAI sets for each collection in the repository. Replaces existing set definitions if they exist ! * but does not remove set definitions if the collection is no longer there. See {@link * #removeOAISetSpecDefinitions}. * --- 5123,5127 ---- /** * Define OAI sets for each collection in the repository. Replaces existing set definitions if they exist ! * but does not remove set definitions if the collection no longer exists. See {@link * #removeOAISetSpecDefinitions}. * *************** *** 5165,5175 **** setDefinitionsForm.setSetDescription(setInfo.getDescription()); ! // Define the Set: setDefinitionsForm.setMatchingFieldValues(new String[]{"collection:" + setInfo.getSetSpec()}); ! //setDefinitionsForm.setIncludedQuery("collection:\"" + setInfo.getSetSpec() + "\""); ! putOAISetSpecDefinition(setDefinitionsForm); return true; } --- 5165,5182 ---- setDefinitionsForm.setSetDescription(setInfo.getDescription()); ! // Define the Set using field/value definition: setDefinitionsForm.setMatchingFieldValues(new String[]{"collection:" + setInfo.getSetSpec()}); ! ! // Test mode toggles between creating the sets as field/value sets or virtual query sets: ! /* if(++defineOaiSetCount%2 == 0) ! setDefinitionsForm.setMatchingFieldValues(new String[]{"collection:" + setInfo.getSetSpec()}); ! else ! setDefinitionsForm.setIncludedQuery("collection:\"" + setInfo.getSetSpec() + "\""); */ ! putOAISetSpecDefinition(setDefinitionsForm); return true; } + + private int defineOaiSetCount = 0; *************** *** 5229,5233 **** if (setQ == null) setQ = virtualSearchFieldMapper.getQuery("setSpec", setSpec); ! prtln("getOaiSetQuery() query: " + (setQ == null ? "null" : setQ.toString())); return setQ; } --- 5236,5240 ---- if (setQ == null) setQ = virtualSearchFieldMapper.getQuery("setSpec", setSpec); ! //prtln("getOaiSetQuery() query: " + (setQ == null ? "null" : setQ.toString())); return setQ; } Index: OAISetsMatchingFieldsBean.java =================================================================== RCS file: /cvsroot/dlsciences/dlese-tools-project/src/org/dlese/dpc/repository/OAISetsMatchingFieldsBean.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** OAISetsMatchingFieldsBean.java 12 Jan 2011 05:44:53 -0000 1.2 --- OAISetsMatchingFieldsBean.java 12 Jan 2011 20:33:23 -0000 1.3 *************** *** 21,24 **** --- 21,25 ---- import org.dlese.dpc.xml.Dom4jNodeListComparator; import org.dlese.dpc.xml.Dom4jUtils; + import org.dlese.dpc.index.reader.DocReader; import org.dom4j.Document; *************** *** 47,50 **** --- 48,53 ---- * <p> * + * Note that a new instance should be created each time the underlying file changes.<p> + * * See <a href="../../../../javadoc-includes/ListSets-config-sample.xml">sample ListSets XML config file</a> * . *************** *** 53,138 **** */ public class OAISetsMatchingFieldsBean { ! private static boolean debug = true; ! private Document listSetsDom = null; private List matchingFieldsSetsList = null; ! OAISetsMatchingFieldsBean( String listSetsXML ) { try { listSetsDom = Dom4jUtils.getXmlDocument(listSetsXML); ! matchingFieldsSetsList = listSetsDom.selectNodes("/ListSets/set[string-length(matchingFieldValues/matchingFieldValue/@field) > 0]"); ! prtln("matchingFieldsSetsList.length = " + matchingFieldsSetsList.size()); ! } catch( Throwable t ) { prtlnErr("Unable to initialize OAISetsMatchingFieldsBean: " + t); } } ! ! OAISetsMatchingFieldsBean() {} ! public int getNumSets() { ! if(matchingFieldsSetsList == null) return 0; return matchingFieldsSetsList.size(); } ! public boolean hasSetDefined(String setSpec) { ! if(listSetsDom == null) return false; ! return listSetsDom.matches("/ListSets/set[string-length(matchingFieldValues/matchingFieldValue/@field) > 0][setSpec='" + setSpec + "']"); } ! public List getOaiSetSpecs() { List setSpecs = new ArrayList(); ! if( matchingFieldsSetsList != null ) { ! for( int i = 0; i < matchingFieldsSetsList.size(); i++) { ! String setSpec = ((Node)matchingFieldsSetsList.get(i)).valueOf("@setSpec"); setSpecs.add(setSpec); } } return setSpecs; } ! ! private String [] fields = null; ! private Map fieldValueMaps = null; ! public List getOaiSetSpecs(org.apache.lucene.document.Document luceneDoc) { ! List setList = null; ! ! // Generate the fieldsList and Maps, if necessary: ! if(fields == null) { ! List fieldsList = new ArrayList(); ! ! if(matchingFieldsSetsList != null) { ! for(int i =0; i < matchingFieldsSetsList.size(); i++) { ! ! ! } } } ! // Gather the list of sets for the Document: ! setList = new ArrayList(); ! return setList; } ! public Query getQueryForSet(String setSpec) { try { ! if(listSetsDom == null) return null; Node matchingSetElm = listSetsDom.selectSingleNode("/ListSets/set[string-length(matchingFieldValues/matchingFieldValue/@field) > 0][setSpec='" + setSpec + "']"); ! if(matchingSetElm == null) return null; List matchingFieldValueNodes = matchingSetElm.selectNodes("matchingFieldValues/matchingFieldValue"); ! if(matchingFieldValueNodes == null || matchingFieldValueNodes.size() == 0) return null; ! BooleanQuery bq = new BooleanQuery(); ! for(int i = 0; i < matchingFieldValueNodes.size(); i++) { ! String field = ((Node)matchingFieldValueNodes.get(i)).valueOf("@field"); ! String term = ((Node)matchingFieldValueNodes.get(i)).valueOf("@value"); ! bq.add(new TermQuery(new Term(field,term)), BooleanClause.Occur.MUST); } //prtln("getQueryForSet() query: " + bq); --- 56,219 ---- */ public class OAISetsMatchingFieldsBean { ! private static boolean debug = false; ! private Document listSetsDom = null; private List matchingFieldsSetsList = null; + private Map fieldValuesMaps = null; ! ! /** ! * Constructs the Bean. Note that a new instance should be created each time the underlying file changes. ! * ! * @param listSetsXML The ListSets XML ! */ ! OAISetsMatchingFieldsBean(String listSetsXML) { try { listSetsDom = Dom4jUtils.getXmlDocument(listSetsXML); ! matchingFieldsSetsList = listSetsDom.selectNodes("/ListSets/set[string-length(matchingFieldValues/matchingFieldValue/@field) > 0]"); ! prtln("matchingFieldsSetsList.length = " + matchingFieldsSetsList.size()); ! } catch (Throwable t) { prtlnErr("Unable to initialize OAISetsMatchingFieldsBean: " + t); } } ! ! ! OAISetsMatchingFieldsBean() { } ! ! ! /** ! * Gets the numSets attribute of the OAISetsMatchingFieldsBean object ! * ! * @return The numSets value ! */ public int getNumSets() { ! if (matchingFieldsSetsList == null) return 0; return matchingFieldsSetsList.size(); } ! ! ! /** ! * Determine of the give setSpec is defined. ! * ! * @param setSpec setSpec ! * @return True if defined ! */ public boolean hasSetDefined(String setSpec) { ! if (listSetsDom == null) return false; ! return listSetsDom.matches("/ListSets/set[string-length(matchingFieldValues/matchingFieldValue/@field) > 0][setSpec='" + setSpec + "']"); } ! ! ! /** ! * Gets the oaiSetSpecs attribute of the OAISetsMatchingFieldsBean object ! * ! * @return The oaiSetSpecs value ! */ public List getOaiSetSpecs() { List setSpecs = new ArrayList(); ! if (matchingFieldsSetsList != null) { ! for (int i = 0; i < matchingFieldsSetsList.size(); i++) { ! String setSpec = ((Node) matchingFieldsSetsList.get(i)).valueOf("setSpec"); setSpecs.add(setSpec); } } + //prtln("getOaiSetSpecs(): " + setSpecs); return setSpecs; } ! ! ! /** ! * Gets the OAI SetSpecs associated with the given Lucene Document. ! * ! * @param luceneDoc Lucene Document ! * @return The oaiSetSpecs value ! */ public List getOaiSetSpecs(org.apache.lucene.document.Document luceneDoc) { ! // Generate the fieldValuesMaps, if necessary (one time then cache): ! if (fieldValuesMaps == null) { ! fieldValuesMaps = new TreeMap(); ! if (matchingFieldsSetsList != null) { ! for (int i = 0; i < matchingFieldsSetsList.size(); i++) { ! Node setNode = (Node) matchingFieldsSetsList.get(i); ! String setSpec = ((Node) matchingFieldsSetsList.get(i)).valueOf("setSpec"); ! List matchingFieldValueNodes = setNode.selectNodes("matchingFieldValues/matchingFieldValue"); ! for (int j = 0; j < matchingFieldValueNodes.size(); j++) { ! Node matchingFieldValueNode = (Node) matchingFieldValueNodes.get(j); ! String field = matchingFieldValueNode.valueOf("@field"); ! String value = matchingFieldValueNode.valueOf("@value"); ! // Grab the field values map for this field: ! Map fieldValuesMap = (Map) fieldValuesMaps.get(field); ! if (fieldValuesMap == null) ! fieldValuesMap = new TreeMap(); ! // Grab the value sets list for this field/value: ! List valueSetsList = (List) fieldValuesMap.get(value); ! if (valueSetsList == null) ! valueSetsList = new ArrayList(); ! if (!valueSetsList.contains(setSpec)) ! valueSetsList.add(setSpec); ! // Insert new values into the Maps: ! fieldValuesMap.put(value, valueSetsList); ! fieldValuesMaps.put(field, fieldValuesMap); ! } ! } } + //prtln("fieldValuesMaps is: " + fieldValuesMaps); } ! // Gather the list of sets for the Document: ! List setList = new ArrayList(); ! Iterator fields = fieldValuesMaps.keySet().iterator(); ! while (fields.hasNext()) { ! String field = (String) fields.next(); ! ! String[] myValues = luceneDoc.getValues(field); ! //prtln("field:" + field + " myValues: " + Arrays.toString(myValues)); ! ! if (myValues != null) { ! for (int jj = 0; jj < myValues.length; jj++) { ! String myValue = myValues[jj]; ! Map fieldValuesMap = (Map) fieldValuesMaps.get(field); ! List valueSetsList = (List) fieldValuesMap.get(myValue); ! if (valueSetsList != null) { ! for (int i = 0; i < valueSetsList.size(); i++) { ! Object setSpec = valueSetsList.get(i); ! if (!setList.contains(setSpec)) ! setList.add(setSpec); ! } ! } ! } ! } ! } ! //prtln("getOaiSetSpecs(): " + setList); return setList; } ! ! ! /** ! * Gets the Lucene Query for the given setSpec. ! * ! * @param setSpec The setSpec ! * @return The queryForSet value ! */ public Query getQueryForSet(String setSpec) { try { ! if (listSetsDom == null) return null; Node matchingSetElm = listSetsDom.selectSingleNode("/ListSets/set[string-length(matchingFieldValues/matchingFieldValue/@field) > 0][setSpec='" + setSpec + "']"); ! if (matchingSetElm == null) return null; List matchingFieldValueNodes = matchingSetElm.selectNodes("matchingFieldValues/matchingFieldValue"); ! if (matchingFieldValueNodes == null || matchingFieldValueNodes.size() == 0) return null; ! BooleanQuery bq = new BooleanQuery(); ! for (int i = 0; i < matchingFieldValueNodes.size(); i++) { ! String field = ((Node) matchingFieldValueNodes.get(i)).valueOf("@field"); ! String term = ((Node) matchingFieldValueNodes.get(i)).valueOf("@value"); ! bq.add(new TermQuery(new Term(field, term)), BooleanClause.Occur.MUST); } //prtln("getQueryForSet() query: " + bq); *************** *** 143,148 **** } } ! ! //================================================================ --- 224,229 ---- } } ! ! //================================================================ |