|
From: <rv...@us...> - 2012-02-03 23:47:32
|
Revision: 1051
http://treebase.svn.sourceforge.net/treebase/?rev=1051&view=rev
Author: rvos
Date: 2012-02-03 23:47:25 +0000 (Fri, 03 Feb 2012)
Log Message:
-----------
*** This commit is supposed to make matrix generation more efficient.
Modified Paths:
--------------
trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java
Modified: trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java
===================================================================
--- trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java 2012-02-03 23:10:39 UTC (rev 1050)
+++ trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java 2012-02-03 23:47:25 UTC (rev 1051)
@@ -26,6 +26,7 @@
import org.cipres.treebase.domain.matrix.StandardMatrix;
import org.cipres.treebase.domain.matrix.StateSet;
import org.cipres.treebase.domain.study.Study;
+import org.cipres.treebase.domain.taxon.SpecimenLabel;
import org.cipres.treebase.domain.taxon.TaxonLabelHome;
import org.nexml.model.Annotatable;
import org.nexml.model.CategoricalMatrix;
@@ -51,22 +52,85 @@
*/
public NexmlMatrixWriter(Study study,TaxonLabelHome taxonLabelHome,Document document) {
super(study,taxonLabelHome,document);
+ }
+
+ /**
+ * This is the method that is called by the NexmlDocumentWriter when turning
+ * a study or data set into a NeXML document
+ * @param tbMatrix
+ * @param xmlOTUs
+ * @return
+ * XXX doesn't handle the following data types:
+ * public static final String MATRIX_DATATYPE_NUCLEOTIDE = "Nucleotide";
+ * public static final String MATRIX_DATATYPE_DISTANCE = "Distance";
+ * public static final String MATRIX_DATATYPE_MIXED = "Mixed";
+ */
+ public org.nexml.model.Matrix<?> fromTreeBaseToXml(CharacterMatrix tbMatrix,OTUs xmlOTUs) {
+
+ // here we decide what subtype of character matrix to instantiate
+ org.nexml.model.Matrix<?> xmlMatrix = createMatrix(tbMatrix, xmlOTUs);
+
+ // here we create column/character sets
+ createCharacterSets(tbMatrix, xmlMatrix);
+
+ return xmlMatrix;
+ }
+
+
+
+ /**
+ *
+ * @param tbMatrix
+ * @param xmlOTUs
+ * @return
+ */
+ private org.nexml.model.Matrix<?> createMatrix(CharacterMatrix tbMatrix, OTUs xmlOTUs) {
+
+ // here we decide what (super-)type to instantiate: discrete or continuous
+ if ( tbMatrix instanceof DiscreteMatrix ) {
+ org.nexml.model.Matrix<CharacterState> xmlDiscreteMatrix = null;
+
+ // 'standard' data is treated separately because we don't have an alphabet for it
+ if ( tbMatrix.getDataType().getDescription().equals(MatrixDataType.MATRIX_DATATYPE_STANDARD) ) {
+
+ // standard categorical
+ xmlDiscreteMatrix = createStandardNexmlMatrix((StandardMatrix) tbMatrix,xmlOTUs);
+ }
+ else {
+
+ // molecular
+ xmlDiscreteMatrix = createMolecularNexmlMatrix((DiscreteMatrix) tbMatrix,xmlOTUs);
+ }
+ populateDiscreteNexmlMatrix(xmlDiscreteMatrix,(DiscreteMatrix)tbMatrix);
+ return xmlDiscreteMatrix;
+ }
+ else if ( tbMatrix instanceof ContinuousMatrix ) {
+
+ // continuous
+ org.nexml.model.ContinuousMatrix xmlContinuousMatrix = createContinuousNexmlMatrix((ContinuousMatrix) tbMatrix,xmlOTUs);
+ populateContinuousNexmlMatrix(xmlContinuousMatrix,(ContinuousMatrix)tbMatrix);
+ return xmlContinuousMatrix;
+ }
+ return null;
}
/**
* Creates and populates characters (i.e. columns) with their annotations,
- * and state sets, with their annotations
+ * and state sets, with their annotations. For standard data (including
+ * those matrices that are actually mostly molecular) we flatten the
+ * (fictional, but modeled) stateset mapping of all state symbols, plus
+ * missing ('?') and gap ('-').
*
* @param tbMatrix
* @return an xml matrix with empty rows
*/
- private CategoricalMatrix fromTreeBaseToXml(StandardMatrix tbMatrix,OTUs xmlOTUs) {
+ private CategoricalMatrix createStandardNexmlMatrix(StandardMatrix tbMatrix,OTUs xmlOTUs) {
if ( null == xmlOTUs ) {
xmlOTUs = getOTUsById(tbMatrix.getTaxa().getId());
}
CategoricalMatrix xmlMatrix = getDocument().createCategoricalMatrix(xmlOTUs);
- setMatrixAttributes(xmlMatrix,tbMatrix);
+ copyMatrixAttributes(tbMatrix,xmlMatrix);
// first flatten the two-dimensional list into a map, we will always only create a single state set
List<List<DiscreteCharState>> tbStateLabels = tbMatrix.getStateLabels();
@@ -94,6 +158,7 @@
}
}
+ // the missing symbol ("?") includes all others, including gap ("-")
UncertainCharacterState gap = xmlStateSet.createUncertainCharacterState("-", new HashSet<CharacterState>());
gap.setLabel("-");
xmlMissingStates.add(gap);
@@ -105,32 +170,13 @@
for ( int i = 0; i < tbColumns.size(); i++ ) {
MatrixColumn tbColumn = tbColumns.get(i);
org.nexml.model.Character xmlCharacter = xmlMatrix.createCharacter(xmlStateSet);
- setCharacterAttributes(tbColumn, xmlCharacter);
+ copyCharacterAttributes(tbColumn, xmlCharacter);
}
return xmlMatrix;
}
-
- private void setCharacterAttributes(MatrixColumn tbColumn,org.nexml.model.Character xmlCharacter) {
- PhyloChar tbCharacter = tbColumn.getCharacter();
- if ( null != tbCharacter.getDescription() ) {
- xmlCharacter.setLabel(tbCharacter.getLabel());
- }
- attachTreeBaseID((Annotatable)xmlCharacter,tbColumn,MatrixColumn.class);
- }
-
- private void setMatrixAttributes(org.nexml.model.Matrix<?> xmlMatrix,CharacterMatrix tbMatrix) {
- // attach matrix identifiers
- attachTreeBaseID((Annotatable)xmlMatrix, tbMatrix,Matrix.class);
- String tb1MatrixID = tbMatrix.getTB1MatrixID();
- if ( null != tb1MatrixID ) {
- ((Annotatable)xmlMatrix).addAnnotationValue("tb:identifier.matrix.tb1", Constants.TBTermsURI, tb1MatrixID);
- }
-
- xmlMatrix.addAnnotationValue("skos:historyNote", Constants.SKOSURI, "Mapped from TreeBASE schema using "+this.toString()+" $Rev$");
- xmlMatrix.setBaseURI(mMatrixBaseURI);
- xmlMatrix.setLabel(tbMatrix.getLabel());
- }
+
+
/**
* Creates and populates characters (i.e. columns) with their annotations,
* and state sets, with their annotations
@@ -138,7 +184,7 @@
* @param tbMatrix
* @return an xml matrix with empty rows
*/
- private MolecularMatrix fromTreeBaseToXml(DiscreteMatrix tbMatrix,OTUs xmlOTUs) {
+ private MolecularMatrix createMolecularNexmlMatrix(DiscreteMatrix tbMatrix,OTUs xmlOTUs) {
if ( null == xmlOTUs ) {
xmlOTUs = getOTUsById(tbMatrix.getTaxa().getId());
}
@@ -146,7 +192,7 @@
MolecularMatrix xmlMatrix = null;
CharacterStateSet xmlStateSet = null;
- // create the matrix and constant state set
+ // create the matrix and constant (IUPAC) state set
if ( tbDataType.equals(MatrixDataType.MATRIX_DATATYPE_DNA) ) {
xmlMatrix = getDocument().createMolecularMatrix(xmlOTUs, MolecularMatrix.DNA);
xmlStateSet = ((MolecularMatrix)xmlMatrix).getDNACharacterStateSet();
@@ -159,7 +205,7 @@
xmlMatrix = getDocument().createMolecularMatrix(xmlOTUs, MolecularMatrix.Protein);
xmlStateSet = ((MolecularMatrix)xmlMatrix).getProteinCharacterStateSet();
}
- setMatrixAttributes(xmlMatrix,tbMatrix);
+ copyMatrixAttributes(tbMatrix,xmlMatrix);
// lookup the equivalent state in tb and attach identifiers
for(StateSet tbStateSet : tbMatrix.getStateSets() ) {
@@ -176,7 +222,7 @@
// create columns and attach identifiers
for ( MatrixColumn tbColumn : tbMatrix.getColumnsReadOnly() ) {
org.nexml.model.Character xmlCharacter = xmlMatrix.createCharacter(xmlStateSet);
- setCharacterAttributes(tbColumn, xmlCharacter);
+ copyCharacterAttributes(tbColumn, xmlCharacter);
}
return xmlMatrix;
}
@@ -188,16 +234,16 @@
* @param tbMatrix
* @return an xml matrix with empty rows
*/
- private org.nexml.model.ContinuousMatrix fromTreeBaseToXml(ContinuousMatrix tbMatrix,OTUs xmlOTUs) {
+ private org.nexml.model.ContinuousMatrix createContinuousNexmlMatrix(ContinuousMatrix tbMatrix,OTUs xmlOTUs) {
if ( null == xmlOTUs ) {
xmlOTUs = getOTUsById(tbMatrix.getTaxa().getId());
}
org.nexml.model.ContinuousMatrix xmlMatrix = getDocument().createContinuousMatrix(xmlOTUs);
- setMatrixAttributes(xmlMatrix,tbMatrix);
+ copyMatrixAttributes(tbMatrix,xmlMatrix);
for ( MatrixColumn tbColumn : tbMatrix.getColumnsReadOnly() ) {
org.nexml.model.Character xmlCharacter = xmlMatrix.createCharacter();
- setCharacterAttributes(tbColumn, xmlCharacter);
+ copyCharacterAttributes(tbColumn, xmlCharacter);
//coerce the tbMatrix into a character matrix to get its character sets
CharacterMatrix tbCharacterMatrix = (CharacterMatrix)tbMatrix;
@@ -225,35 +271,18 @@
nexSubset.addThing(nexCharacters.get(i));
}
}
- }
-
+ }
}
return xmlMatrix;
}
-
-// XXX doesn't handle the following data types:
-// public static final String MATRIX_DATATYPE_NUCLEOTIDE = "Nucleotide";
-// public static final String MATRIX_DATATYPE_DISTANCE = "Distance";
-// public static final String MATRIX_DATATYPE_MIXED = "Mixed";
- @SuppressWarnings("unchecked")
- public org.nexml.model.Matrix<?> fromTreeBaseToXml(CharacterMatrix tbMatrix,OTUs xmlOTUs) {
- org.nexml.model.Matrix<?> xmlMatrix = null;
- if ( tbMatrix instanceof DiscreteMatrix ) {
- if ( tbMatrix.getDataType().getDescription().equals(MatrixDataType.MATRIX_DATATYPE_STANDARD) ) {
- xmlMatrix = fromTreeBaseToXml((StandardMatrix) tbMatrix,xmlOTUs);
- }
- else {
- xmlMatrix = fromTreeBaseToXml((DiscreteMatrix) tbMatrix,xmlOTUs);
- }
- populateXmlMatrix((org.nexml.model.Matrix<CharacterState>)xmlMatrix,(DiscreteMatrix)tbMatrix);
- }
- else if ( tbMatrix instanceof ContinuousMatrix ) {
- xmlMatrix = fromTreeBaseToXml((ContinuousMatrix) tbMatrix,xmlOTUs);
- populateXmlMatrix((org.nexml.model.ContinuousMatrix)xmlMatrix,(ContinuousMatrix)tbMatrix);
- }
-
+ /**
+ *
+ * @param tbMatrix
+ * @param xmlMatrix
+ */
+ private void createCharacterSets(CharacterMatrix tbMatrix, org.nexml.model.Matrix<?> xmlMatrix) {
// here we copy the character sets for all matrix types
Set<CharSet> tbCharSets = tbMatrix.getCharSets();
for ( CharSet tbCharSet : tbCharSets ) {
@@ -270,25 +299,24 @@
int tbInc = 1;
// need to do this to prevent nullpointerexceptions
- if ( null != tbColumnRange.getRepeatInterval()) {
- tbInc = tbColumnRange.getRepeatInterval();
+ Integer tbRepeatInterval = tbColumnRange.getRepeatInterval();
+ if ( null != tbRepeatInterval ) {
+ tbInc = tbRepeatInterval;
}
// create the equivalent nexml character set
- Subset nexSubset = xmlMatrix.createSubset(tbCharSet.getLabel());
+ Subset xmlSubset = xmlMatrix.createSubset(tbCharSet.getLabel());
// assign character objects to the subset. Here we get the full list
- List<org.nexml.model.Character> nexCharacters = xmlMatrix.getCharacters();
+ List<org.nexml.model.Character> xmlCharacters = xmlMatrix.getCharacters();
// now we iterate over the coordinates and assign the nexml characters to the set
for ( int i = tbStart; i <= tbStop; i += tbInc ) {
- nexSubset.addThing(nexCharacters.get(i));
+ xmlSubset.addThing(xmlCharacters.get(i));
}
}
}
-
- return xmlMatrix;
- }
+ }
/**
*
@@ -297,135 +325,17 @@
* @param xmlOTUs
* @param stateSet
*/
- private void populateXmlMatrix(
- org.nexml.model.Matrix<CharacterState> xmlMatrix,
- DiscreteMatrix tbMatrix) {
+ private void populateDiscreteNexmlMatrix(org.nexml.model.Matrix<CharacterState> xmlMatrix, DiscreteMatrix tbMatrix) {
+
OTUs xmlOTUs = xmlMatrix.getOTUs();
- List<org.nexml.model.Character> characterList = xmlMatrix.getCharacters();
+ List<org.nexml.model.Character> xmlCharacters = xmlMatrix.getCharacters();
+
+ // iterates over all matrix rows, i.e. ntax times
for ( MatrixRow tbRow : tbMatrix.getRowsReadOnly() ) {
Set<RowSegment> tbSegments = tbRow.getSegmentsReadOnly();
OTU xmlOTU = getOTUById(xmlOTUs, tbRow.getTaxonLabel().getId());
- int charIndex = 0;
- if ( characterList.size() <= MAX_GRANULAR_NCHAR && xmlOTUs.getAllOTUs().size() <= MAX_GRANULAR_NTAX ) {
- for ( MatrixColumn tbColumn : ((CharacterMatrix)tbMatrix).getColumns() ) {
- String seq = tbRow.getNormalizedSymbolString();
- xmlMatrix.setSeq(seq, xmlOTU);
- org.nexml.model.Character xmlCharacter = characterList.get(charIndex);
- MatrixCell<CharacterState> xmlCell = xmlMatrix.getCell(xmlOTU, xmlCharacter);
-
- attachTreeBaseID ((Annotatable) xmlCell, tbColumn , DiscreteMatrixElement.class);
-
- //The following is commented out as tbRow.getElements() does not work directly and crashes the loop.
- //The above for loop fixes this issue.
- /*
- for ( MatrixElement tbCell : tbRow.getElements() ) {
- org.nexml.model.Character xmlCharacter = characterList.get(charIndex);
- MatrixCell<CharacterState> xmlCell = xmlMatrix.getCell(xmlOTU, xmlCharacter);
- DiscreteCharState tbState = ((DiscreteMatrixElement)tbCell).getCharState();
- String tbSymbolString = ( null == tbState ) ? "?" : tbState.getSymbol().toString();
- CharacterState xmlState = xmlCharacter.getCharacterStateSet().lookupCharacterStateBySymbol(tbSymbolString);
- xmlCell.setValue(xmlState);
- attachTreeBaseID((Annotatable)xmlCell,tbCell,DiscreteMatrixElement.class);
- */
-
- for ( RowSegment tbSegment : tbSegments ) {
- if ( tbSegment.getStartIndex() <= charIndex && charIndex <= tbSegment.getEndIndex() ) {
- //declare variables for row-segment annotations
- String title = tbSegment.getTitle();
- String institutionCode = tbSegment.getSpecimenLabel().getInstAcronym();
- String collectionCode = tbSegment.getSpecimenLabel().getCollectionCode();
- String catalogNumber = tbSegment.getSpecimenLabel().getCatalogNumber();
- String accessionNumber = tbSegment.getSpecimenLabel().getGenBankAccession();
- String otherAccessionNumber = tbSegment.getSpecimenLabel().getOtherAccession();
- String dateSampled = tbSegment.getSpecimenLabel().getSampleDateString();
- String scientificName = tbSegment.getSpecimenTaxonLabelAsString();
- String collector = tbSegment.getSpecimenLabel().getCollector();
- Double latitude = tbSegment.getSpecimenLabel().getLatitude();
- Double longitude = tbSegment.getSpecimenLabel().getLongitude();
- Double elevation = tbSegment.getSpecimenLabel().getElevation();
- String country = tbSegment.getSpecimenLabel().getCountry();
- String state = tbSegment.getSpecimenLabel().getState();
- String locality = tbSegment.getSpecimenLabel().getLocality();
- String notes = tbSegment.getSpecimenLabel().getNotes();
-
- //if the value is not null, output the xmlOTU annotation.
- //DwC refers to the Darwin Core term vocabulary for the associated annotation
- if (null != title){
- //output name identifying the data set from which the record was derived
- ((Annotatable)xmlCell).addAnnotationValue("DwC:datasetName", Constants.DwCURI, title);
- }
- if ( null != institutionCode ) {
- //output name or acronym of institution that has custody of information referred to in record
- ((Annotatable)xmlCell).addAnnotationValue("DwC:institutionCode", Constants.DwCURI, institutionCode);
- }
- if ( null != collectionCode ) {
- //output name or code that identifies collection or data set from which record was derived
- ((Annotatable)xmlCell).addAnnotationValue ("DwC:collectionCode", Constants.DwCURI, collectionCode);
- }
- if ( null != catalogNumber ){
- //output unique (usually) identifier for the record within data set or collection
- ((Annotatable)xmlCell).addAnnotationValue("DwC:catalogNumber", Constants.DwCURI, catalogNumber);
- }
- if ( null != accessionNumber) {
- //output a list of genetic sequence information associated with occurrence
- ((Annotatable)xmlCell).addAnnotationValue("DwC:associatedSequences", Constants.DwCURI, accessionNumber);
- }
- if ( null != otherAccessionNumber ) {
- //list of previous or alternate fully catalog numbers (i.e. Genbank) or human-used identifiers
- ((Annotatable)xmlCell).addAnnotationValue("DwC:otherCatalogNumbers", Constants.DwCURI, otherAccessionNumber);
- }
- if ( null != dateSampled ) {
- //output date sampled in ISO 8601 format
- ((Annotatable)xmlCell).addAnnotationValue("DwC:eventDate", Constants.DwCURI, dateSampled);
- }
- if ( null != scientificName ) {
- //output full scientific name
- ((Annotatable)xmlCell).addAnnotationValue("DwC:scientificName", Constants.DwCURI, scientificName);
- }
- if ( null != collector ) {
- //output names of people associated with recording of original occurrence
- ((Annotatable)xmlCell).addAnnotationValue("DwC:recordedBy", Constants.DwCURI, collector);
- }
- if ( null != latitude ) {
- //output geographic latitude in decimal degrees using geodeticDatum spatial reference system
- ((Annotatable)xmlCell).addAnnotationValue("DwC:decimalLatitude", Constants.DwCURI, latitude);
- }
- if ( null != longitude ) {
- //output geographic longitude in decimal degrees using geodeticDatum spatial reference system
- ((Annotatable)xmlCell).addAnnotationValue("DwC:decimalLongitude", Constants.DwCURI, longitude);
- }
- if ( null != elevation ) {
- //there are two different Darwin Core terms for elevation depending on elevation value
- //outputs geographic elevation of sample
- if ( elevation >= 0) {
- //above local surface in meters
- ((Annotatable)xmlCell).addAnnotationValue("DwC:verbatimElevation", Constants.DwCURI, elevation);
- }
- else {
- //below local surface in meters
- ((Annotatable)xmlCell).addAnnotationValue("DwC:verbatimDepth", Constants.DwCURI, elevation);
- }
- }
- if ( null != country ) {
- //output country in which location occurs
- ((Annotatable)xmlCell).addAnnotationValue("DwC:country", Constants.DwCURI, country);
- }
- if ( null != state ) {
- //output name of next smaller administrative region than country (i.e. state, province, region)
- ((Annotatable)xmlCell).addAnnotationValue ("DwC:stateProvince", Constants.DwCURI, state);
- }
- if ( null != locality) {
- //output brief description of sample location
- ((Annotatable)xmlCell).addAnnotationValue("DwC:locality", Constants.DwCURI, locality);
- }
- if ( null != notes ) {
- //output any additional information about specimen
- ((Annotatable)xmlCell).addAnnotationValue("DwC:occurenceRemarks", Constants.DwCURI, notes);
- }
- }
- }
- charIndex++;
- }
+ if ( xmlCharacters.size() <= MAX_GRANULAR_NCHAR && xmlOTUs.getAllOTUs().size() <= MAX_GRANULAR_NTAX ) {
+ populateDiscreteVerboseNexmlMatrix(xmlMatrix,tbMatrix,xmlCharacters,tbRow,tbSegments,xmlOTU);
}
else {
String seq = tbRow.getNormalizedSymbolString();
@@ -439,102 +349,49 @@
}
}
xmlMatrix.setSeq(seq,xmlOTU);
+
+ // this often only happens once, when the row has only 1 segment
+ for ( RowSegment tbSegment : tbSegments ) {
+ copyDarwinCoreAnnotations(tbSegment, xmlOTU);
+ }
}
- for ( RowSegment tbSegment : tbSegments ) {
- //declare variables for row-segment annotations
- String title = tbSegment.getTitle();
- String institutionCode = tbSegment.getSpecimenLabel().getInstAcronym();
- String collectionCode = tbSegment.getSpecimenLabel().getCollectionCode();
- String catalogNumber = tbSegment.getSpecimenLabel().getCatalogNumber();
- String accessionNumber = tbSegment.getSpecimenLabel().getGenBankAccession();
- String otherAccessionNumber = tbSegment.getSpecimenLabel().getOtherAccession();
- String dateSampled = tbSegment.getSpecimenLabel().getSampleDateString();
- String scientificName = tbSegment.getSpecimenTaxonLabelAsString();
- String collector = tbSegment.getSpecimenLabel().getCollector();
- Double latitude = tbSegment.getSpecimenLabel().getLatitude();
- Double longitude = tbSegment.getSpecimenLabel().getLongitude();
- Double elevation = tbSegment.getSpecimenLabel().getElevation();
- String country = tbSegment.getSpecimenLabel().getCountry();
- String state = tbSegment.getSpecimenLabel().getState();
- String locality = tbSegment.getSpecimenLabel().getLocality();
- String notes = tbSegment.getSpecimenLabel().getNotes();
+ }
+ }
+
+ /**
+ *
+ * @param xmlMatrix
+ * @param tbMatrix
+ * @param xmlCharacterList
+ * @param tbRow
+ * @param tbSegments
+ * @param xmlOTU
+ */
+ private void populateDiscreteVerboseNexmlMatrix(
+ org.nexml.model.Matrix<CharacterState> xmlMatrix,
+ DiscreteMatrix tbMatrix,
+ List<org.nexml.model.Character> xmlCharacterList,MatrixRow tbRow,
+ Set<RowSegment> tbSegments, OTU xmlOTU) {
- //if the value is not null, output the xmlOTU annotation.
- //DwC refers to the Darwin Core term vocabulary for the associated annotation
- if (null != title){
- //output name identifying the data set from which the record was derived
- xmlOTU.addAnnotationValue("DwC:datasetName", Constants.DwCURI, title);
+ // iterates over all characters, i.e. nchar times
+ int charIndex = 0;
+ String seq = tbRow.getSymbolString();
+ for ( MatrixColumn tbColumn : ((CharacterMatrix)tbMatrix).getColumns() ) {
+
+ org.nexml.model.Character xmlCharacter = xmlCharacterList.get(charIndex);
+ MatrixCell<CharacterState> xmlCell = xmlMatrix.getCell(xmlOTU, xmlCharacter);
+ String value = "" + seq.charAt(charIndex);
+ CharacterState xmlState = xmlMatrix.parseSymbol(value);
+ xmlCell.setValue(xmlState);
+ attachTreeBaseID ((Annotatable) xmlCell, tbColumn , DiscreteMatrixElement.class);
+
+ for ( RowSegment tbSegment : tbSegments ) {
+ if ( tbSegment.getStartIndex() <= charIndex && charIndex <= tbSegment.getEndIndex() ) {
+ copyDarwinCoreAnnotations(tbSegment, (Annotatable)xmlCell);
}
- if ( null != institutionCode ) {
- //output name or acronym of institution that has custody of information referred to in record
- xmlOTU.addAnnotationValue("DwC:institutionCode", Constants.DwCURI, institutionCode);
- }
- if ( null != collectionCode ) {
- //output name or code that identifies collection or data set from which record was derived
- xmlOTU.addAnnotationValue ("DwC:collectionCode", Constants.DwCURI, collectionCode);
- }
- if ( null != catalogNumber ){
- //output unique (usually) identifier for the record within data set or collection
- xmlOTU.addAnnotationValue("DwC:catalogNumber", Constants.DwCURI, catalogNumber);
- }
- if ( null != accessionNumber) {
- //output a list of genetic sequence information associated with occurrence
- xmlOTU.addAnnotationValue("DwC:associatedSequences", Constants.DwCURI, accessionNumber);
- }
- if ( null != otherAccessionNumber ) {
- //list of previous or alternate fully catalog numbers (i.e. Genbank) or human-used identifiers
- xmlOTU.addAnnotationValue("DwC:otherCatalogNumbers", Constants.DwCURI, otherAccessionNumber);
- }
- if ( null != dateSampled ) {
- //output date sampled in ISO 8601 format
- xmlOTU.addAnnotationValue("DwC:eventDate", Constants.DwCURI, dateSampled);
- }
- if ( null != scientificName ) {
- //output full scientific name
- xmlOTU.addAnnotationValue("DwC:scientificName", Constants.DwCURI, scientificName);
- }
- if ( null != collector ) {
- //output names of people associated with recording of original occurrence
- xmlOTU.addAnnotationValue("DwC:recordedBy", Constants.DwCURI, collector);
- }
- if ( null != latitude ) {
- //output geographic latitude in decimal degrees using geodeticDatum spatial reference system
- xmlOTU.addAnnotationValue("DwC:decimalLatitude", Constants.DwCURI, latitude);
- }
- if ( null != longitude ) {
- //output geographic longitude in decimal degrees using geodeticDatum spatial reference system
- xmlOTU.addAnnotationValue("DwC:decimalLongitude", Constants.DwCURI, longitude);
- }
- if ( null != elevation ) {
- //there are two different Darwin Core terms for elevation depending on elevation value
- //outputs geographic elevation of sample
- if ( elevation >= 0) {
- //above local surface in meters
- xmlOTU.addAnnotationValue("DwC:verbatimElevation", Constants.DwCURI, elevation);
- }
- else {
- //below local surface in meters
- xmlOTU.addAnnotationValue("DwC:verbatimDepth", Constants.DwCURI, elevation);
- }
- }
- if ( null != country ) {
- //output country in which location occurs
- xmlOTU.addAnnotationValue("DwC:country", Constants.DwCURI, country);
- }
- if ( null != state ) {
- //output name of next smaller administrative region than country (i.e. state, province, region)
- xmlOTU.addAnnotationValue ("DwC:stateProvince", Constants.DwCURI, state);
- }
- if ( null != locality) {
- //output brief description of sample location
- xmlOTU.addAnnotationValue("DwC:locality", Constants.DwCURI, locality);
- }
- if ( null != notes ) {
- //output any additional information about specimen
- xmlOTU.addAnnotationValue("DwC:occurenceRemarks", Constants.DwCURI, notes);
- }
}
- }
+ charIndex++;
+ }
}
/**
@@ -542,7 +399,7 @@
* @param xmlMatrix
* @param tbMatrix
*/
- private void populateXmlMatrix(org.nexml.model.ContinuousMatrix xmlMatrix,
+ private void populateContinuousNexmlMatrix(org.nexml.model.ContinuousMatrix xmlMatrix,
ContinuousMatrix tbMatrix) {
List<org.nexml.model.Character> characterList = xmlMatrix.getCharacters();
OTUs xmlOTUs = xmlMatrix.getOTUs();
@@ -563,100 +420,100 @@
}
Set<RowSegment> tbSegments = tbRow.getSegmentsReadOnly();
for ( RowSegment tbSegment : tbSegments ) {
- //declare variables for row-segment annotations
- String title = tbSegment.getTitle();
- String institutionCode = tbSegment.getSpecimenLabel().getInstAcronym();
- String collectionCode = tbSegment.getSpecimenLabel().getCollectionCode();
- String catalogNumber = tbSegment.getSpecimenLabel().getCatalogNumber();
- String accessionNumber = tbSegment.getSpecimenLabel().getGenBankAccession();
- String otherAccessionNumber = tbSegment.getSpecimenLabel().getOtherAccession();
- String dateSampled = tbSegment.getSpecimenLabel().getSampleDateString();
- String scientificName = tbSegment.getSpecimenTaxonLabelAsString();
- String collector = tbSegment.getSpecimenLabel().getCollector();
- Double latitude = tbSegment.getSpecimenLabel().getLatitude();
- Double longitude = tbSegment.getSpecimenLabel().getLongitude();
- Double elevation = tbSegment.getSpecimenLabel().getElevation();
- String country = tbSegment.getSpecimenLabel().getCountry();
- String state = tbSegment.getSpecimenLabel().getState();
- String locality = tbSegment.getSpecimenLabel().getLocality();
- String notes = tbSegment.getSpecimenLabel().getNotes();
-
- //if the value is not null, output the xmlOTU annotation.
- //DwC refers to the Darwin Core term vocabulary for the associated annotation
- if (null != title){
- //output name identifying the data set from which the record was derived
- xmlOTU.addAnnotationValue("DwC:datasetName", Constants.DwCURI, title);
- }
- if ( null != institutionCode ) {
- //output name or acronym of institution that has custody of information referred to in record
- xmlOTU.addAnnotationValue("DwC:institutionCode", Constants.DwCURI, institutionCode);
- }
- if ( null != collectionCode ) {
- //output name or code that identifies collection or data set from which record was derived
- xmlOTU.addAnnotationValue ("DwC:collectionCode", Constants.DwCURI, collectionCode);
- }
- if ( null != catalogNumber ){
- //output unique (usually) identifier for the record within data set or collection
- xmlOTU.addAnnotationValue("DwC:catalogNumber", Constants.DwCURI, catalogNumber);
- }
- if ( null != accessionNumber) {
- //output a list of genetic sequence information associated with occurrence
- xmlOTU.addAnnotationValue("DwC:associatedSequences", Constants.DwCURI, accessionNumber);
- }
- if ( null != otherAccessionNumber ) {
- //list of previous or alternate fully catalog numbers (i.e. Genbank) or human-used identifiers
- xmlOTU.addAnnotationValue("DwC:otherCatalogNumbers", Constants.DwCURI, otherAccessionNumber);
- }
- if ( null != dateSampled ) {
- //output date sampled in ISO 8601 format
- xmlOTU.addAnnotationValue("DwC:eventDate", Constants.DwCURI, dateSampled);
- }
- if ( null != scientificName ) {
- //output full scientific name
- xmlOTU.addAnnotationValue("DwC:scientificName", Constants.DwCURI, scientificName);
- }
- if ( null != collector ) {
- //output names of people associated with recording of original occurrence
- xmlOTU.addAnnotationValue("DwC:recordedBy", Constants.DwCURI, collector);
- }
- if ( null != latitude ) {
- //output geographic latitude in decimal degrees using geodeticDatum spatial reference system
- xmlOTU.addAnnotationValue("DwC:decimalLatitude", Constants.DwCURI, latitude);
- }
- if ( null != longitude ) {
- //output geographic longitude in decimal degrees using geodeticDatum spatial reference system
- xmlOTU.addAnnotationValue("DwC:decimalLongitude", Constants.DwCURI, longitude);
- }
- if ( null != elevation ) {
- //there are two different Darwin Core terms for elevation depending on elevation value
- //outputs geographic elevation of sample
- if ( elevation >= 0) {
- //above local surface in meters
- xmlOTU.addAnnotationValue("DwC:verbatimElevation", Constants.DwCURI, elevation);
- }
- else {
- //below local surface in meters
- xmlOTU.addAnnotationValue("DwC:verbatimDepth", Constants.DwCURI, elevation);
- }
- }
- if ( null != country ) {
- //output country in which location occurs
- xmlOTU.addAnnotationValue("DwC:country", Constants.DwCURI, country);
- }
- if ( null != state ) {
- //output name of next smaller administrative region than country (i.e. state, province, region)
- xmlOTU.addAnnotationValue ("DwC:stateProvince", Constants.DwCURI, state);
- }
- if ( null != locality) {
- //output brief description of sample location
- xmlOTU.addAnnotationValue("DwC:locality", Constants.DwCURI, locality);
- }
- if ( null != notes ) {
- //output any additional information about specimen
- xmlOTU.addAnnotationValue("DwC:occurenceRemarks", Constants.DwCURI, notes);
- }
+ copyDarwinCoreAnnotations(tbSegment,xmlOTU);
}
}
+ }
+
+ /**
+ *
+ * @param tbSegment
+ * @param xmlAnnotatable
+ */
+ private void copyDarwinCoreAnnotations(RowSegment tbSegment, Annotatable xmlAnnotatable) {
+
+ SpecimenLabel tbSpec = tbSegment.getSpecimenLabel();
+ Map<String,String> predicateToObjectMap = new HashMap<String,String>();
+
+ predicateToObjectMap.put("DwC:institutionCode", tbSpec.getInstAcronym());
+ predicateToObjectMap.put("DwC:collectionCode", tbSpec.getCollectionCode());
+ predicateToObjectMap.put("DwC:catalogNumber", tbSpec.getCatalogNumber());
+ predicateToObjectMap.put("DwC:associatedSequences", tbSpec.getGenBankAccession());
+ predicateToObjectMap.put("DwC:otherCatalogNumbers", tbSpec.getOtherAccession());
+ predicateToObjectMap.put("DwC:eventDate", tbSpec.getSampleDateString());
+ predicateToObjectMap.put("DwC:scientificName", tbSegment.getSpecimenTaxonLabelAsString());
+ predicateToObjectMap.put("DwC:recordedBy", tbSpec.getCollector());
+ predicateToObjectMap.put("DwC:country", tbSpec.getCountry());
+ predicateToObjectMap.put("DwC:locality", tbSpec.getLocality());
+ predicateToObjectMap.put("DwC:stateProvince", tbSpec.getState());
+ predicateToObjectMap.put("DwC:datasetName", tbSegment.getTitle());
+ predicateToObjectMap.put("DwC:occurenceRemarks", tbSpec.getNotes());
+
+ for ( String predicate : predicateToObjectMap.keySet() ) {
+ String objectString = predicateToObjectMap.get(predicate);
+ if ( null != objectString ) {
+ xmlAnnotatable.addAnnotationValue(predicate, Constants.DwCURI, objectString);
+ }
+ }
+
+ //output geographic latitude in decimal degrees using geodeticDatum spatial reference system
+ Double latitude = tbSpec.getLatitude();
+ if ( null != latitude ) {
+ xmlAnnotatable.addAnnotationValue("DwC:decimalLatitude", Constants.DwCURI, latitude);
+ }
+
+ //output geographic longitude in decimal degrees using geodeticDatum spatial reference system
+ Double longitude = tbSpec.getLongitude();
+ if ( null != longitude ) {
+ xmlAnnotatable.addAnnotationValue("DwC:decimalLongitude", Constants.DwCURI, longitude);
+ }
+
+ //there are two different Darwin Core terms for elevation depending on elevation value
+ //outputs geographic elevation of sample
+ Double elevation = tbSpec.getElevation();
+ if ( null != elevation ) {
+ if ( elevation >= 0) {
+ //above local surface in meters
+ xmlAnnotatable.addAnnotationValue("DwC:verbatimElevation", Constants.DwCURI, elevation);
+ }
+ else {
+ //below local surface in meters
+ xmlAnnotatable.addAnnotationValue("DwC:verbatimDepth", Constants.DwCURI, elevation);
+ }
+ }
}
+
+ /**
+ *
+ * @param tbColumn
+ * @param xmlCharacter
+ */
+ private void copyCharacterAttributes(MatrixColumn tbColumn,org.nexml.model.Character xmlCharacter) {
+ PhyloChar tbCharacter = tbColumn.getCharacter();
+ if ( null != tbCharacter.getDescription() ) {
+ xmlCharacter.setLabel(tbCharacter.getLabel());
+ }
+ attachTreeBaseID((Annotatable)xmlCharacter,tbColumn,MatrixColumn.class);
+ }
+
+ /**
+ *
+ * @param tbMatrix
+ * @param xmlMatrix
+ */
+ private void copyMatrixAttributes(CharacterMatrix tbMatrix,org.nexml.model.Matrix<?> xmlMatrix) {
+ // attach matrix identifiers
+ attachTreeBaseID((Annotatable)xmlMatrix, tbMatrix,Matrix.class);
+ String tb1MatrixID = tbMatrix.getTB1MatrixID();
+ if ( null != tb1MatrixID ) {
+ ((Annotatable)xmlMatrix).addAnnotationValue("tb:identifier.matrix.tb1", Constants.TBTermsURI, tb1MatrixID);
+ }
+
+ xmlMatrix.addAnnotationValue("skos:historyNote", Constants.SKOSURI, "Mapped from TreeBASE schema using "+this.toString()+" $Rev$");
+ xmlMatrix.setBaseURI(mMatrixBaseURI);
+ xmlMatrix.setLabel(tbMatrix.getLabel());
+ }
+
+
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|