From: John W. <jwe...@us...> - 2011-01-15 00:54:37
|
Update of /cvsroot/dlsciences/dlese-tools-project/src/org/dlese/dpc/xml In directory sfp-cvsdas-3.v30.ch3.sourceforge.com:/tmp/cvs-serv4368/src/org/dlese/dpc/xml Added Files: NSDLDCNormalizer.java Log Message: new functionality for NSDL DC normalization --- NEW FILE: NSDLDCNormalizer.java --- /* * License and Copyright: * * The contents of this file are subject to the Educational Community License v1.0 (the "License"); you may * not use this file except in compliance with the License. You should have received a copy of the License * along with this software; if not, you may obtain a copy of the License at * http://www.opensource.org/licenses/ecl1.php. * * Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, * either express or implied. See the License for the specific language governing rights and limitations * under the License. * * Copyright 2002-2011 by Digital Learning Sciences, University Corporation for Atmospheric Research (UCAR). * All rights reserved. */ package org.dlese.dpc.xml; import java.io.*; import java.util.*; import java.net.URL; import java.net.MalformedURLException; import java.text.*; import java.io.*; import org.dom4j.*; import org.dlese.dpc.xml.Dom4jUtils; import org.dlese.dpc.util.URLConnectionTimedOutException; import java.net.URLEncoder; /** * Normalizes NSDL DC records to conform to standard vocabularies in specific data elements like audience, * education level, and type. Uses groups files to define the data elements and vocabularies. Lazy-loads the * groups files and caches them once loaded. Create a new NSDLDCNormalizer to release the cache and refresh * the vocabularies. * * @author John Weatherley */ public class NSDLDCNormalizer { private static boolean debug = true; private String nsdlDcVocabSelectionsUrlStr = null; private Document _vocabSelectionsDom = null; /** * Constructors a NSDLDCNormalizer object. Create a new NSDLDCNormalizer to refresh the vocabularies. * * @param nsdlDcVocabSelectionsUrl NOT YET DOCUMENTED */ public NSDLDCNormalizer(String nsdlDcVocabSelectionsUrl) { this.nsdlDcVocabSelectionsUrlStr = nsdlDcVocabSelectionsUrl; } private List _xpathsToModify = null; private Map _groupsFileDoms = null; private Map _fromXpathsLists = null; /** * Normalizes the given NSDL DC record. * * @param nsdlDcRecordDoc An NSDL DC record Document * @return Normalized NSDL DC record Document */ public Document normalizeNsdlDcRecord(Document nsdlDcRecordDoc) { Document newDoc = null; try { initAllData(); if (_xpathsToModify == null || _xpathsToModify.size() == 0 || nsdlDcRecordDoc == null) return nsdlDcRecordDoc; newDoc = (Document) nsdlDcRecordDoc.clone(); for (int i = 0; i < _xpathsToModify.size(); i++) { String xPathToModify = (String) _xpathsToModify.get(i); // First remove all the existing elements from the new Document: List existingElms = newDoc.selectNodes(xPathToModify); for (int ii = 0; ii < existingElms.size(); ii++) ((Node) existingElms.get(ii)).detach(); Document groupsDom = (Document) _groupsFileDoms.get(xPathToModify); List fromXpathsList = (List) _fromXpathsLists.get(xPathToModify); // Only process if we've got something to process... if (groupsDom != null && fromXpathsList != null || fromXpathsList.size() > 0) { // For each xPath to pull from, grab it's value and translate to the new value: for (int j = 0; j < fromXpathsList.size(); j++) { String fromXpath = (String) fromXpathsList.get(j); prtln("fromXpath: " + fromXpath); List existingDCElms = nsdlDcRecordDoc.selectNodes(fromXpath); for (int k = 0; k < existingDCElms.size(); k++) { String existingValue = ((Node) existingDCElms.get(k)).getText(); prtln("existingValue: " + existingValue); //prtln("groupsDom:\n" + groupsDom.asXML()); List outlineNodes = groupsDom.selectNodes("/opml/body//outline[@vocab='" + existingValue + "']"); prtln("numOutlineNodes: " + outlineNodes.size()); for (int m = 0; m < outlineNodes.size(); m++) { Node outlineNode = (Node) outlineNodes.get(m); List newValueOutlineNodes = outlineNode.selectNodes(".[@type='group']|..[@type='group']|../..[@type='group']"); // Go one and two levels up, for nested vocabs like EdLevel prtln("numVewValueOutlineNodes: " + newValueOutlineNodes.size()); for (int n = 0; n < newValueOutlineNodes.size(); n++) { Node newValueOutlineNode = (Node) newValueOutlineNodes.get(n); String newValue = newValueOutlineNode.valueOf("@vocab").trim(); String xsiType = newValueOutlineNode.valueOf("@attribution").trim(); prtln("xPathToModify:" + xPathToModify + " existingValue:'" + existingValue + "' newValue:'" + newValue + "' xsiType:'" + xsiType + "'"); } } } } } } prtln("exisingNsdlDC:\n" + nsdlDcRecordDoc.asXML()); prtln("newNsdlDC:\n" + newDoc.asXML()); } catch (Throwable t) { prtlnErr("Error normalizing NSDL DC record: " + t); t.printStackTrace(); return nsdlDcRecordDoc; } return newDoc; } private void initAllData() { // Init only if not done so previously: if (_vocabSelectionsDom == null) { try { _vocabSelectionsDom = Dom4jUtils.getXmlDocument(nsdlDcVocabSelectionsUrlStr, 5000); } catch (Throwable t) { prtlnErr("Error fetching vocab selections file from URL '" + nsdlDcVocabSelectionsUrlStr + "': " + t); return; } if (_vocabSelectionsDom == null) return; List groupsFileNodes = _vocabSelectionsDom.selectNodes("/groupsFiles/groupsFile"); if (groupsFileNodes.size() == 0) return; _xpathsToModify = new ArrayList(); _groupsFileDoms = new TreeMap(); _fromXpathsLists = new TreeMap(); for (int i = 0; i < groupsFileNodes.size(); i++) { Node groupsFileNode = (Node) groupsFileNodes.get(i); String toXpath = groupsFileNode.valueOf("toXpath").trim(); String url = groupsFileNode.valueOf("url").trim(); List fromXpaths = groupsFileNode.selectNodes("fromXpaths/fromXpath"); if (toXpath.length() > 0 && url.length() > 0 && fromXpaths.size() > 0) { Document groupsDom = null; try { groupsDom = Dom4jUtils.localizeXml(Dom4jUtils.getXmlDocument(url, 5000)); } catch (Throwable t) { prtlnErr("Error fetching groups file rom URL '" + url + "': " + t); continue; } List fromXpathsStrings = new ArrayList(); for (int j = 0; j < fromXpaths.size(); j++) fromXpathsStrings.add(((Node) fromXpaths.get(j)).getText().trim()); _xpathsToModify.add(toXpath); _groupsFileDoms.put(toXpath, groupsDom); _fromXpathsLists.put(toXpath, fromXpathsStrings); } } } } /* ---------------------- Debug methods ----------------------- */ /** * Return a string for the current time and date, sutiable for display in log files and output to standout: * * @return The dateStamp value */ public static String getDateStamp() { return new SimpleDateFormat("MMM d, yyyy h:mm:ss a zzz").format(new Date()); } /** * Output a line of text to error out, with datestamp. * * @param s The text that will be output to error out. */ private final static void prtlnErr(String s) { System.err.println(getDateStamp() + " NSDLDCNormalizer Error: " + s); } /** * Output a line of text to standard out, with datestamp, if debug is set to true. * * @param s The String that will be output. */ private final static void prtln(String s) { if (debug) { System.out.println(getDateStamp() + " NSDLDCNormalizer: " + s); } } /** * Sets the debug attribute of the NSDLDCNormalizer object * * @param db The new debug value */ public final void setDebug(boolean db) { debug = db; } } |