From: <rv...@us...> - 2011-10-13 18:40:34
|
Revision: 972 http://treebase.svn.sourceforge.net/treebase/?rev=972&view=rev Author: rvos Date: 2011-10-13 18:40:28 +0000 (Thu, 13 Oct 2011) Log Message: ----------- Factored out methods to apply default attributes to matrices and characters Modified Paths: -------------- trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java Modified: trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java =================================================================== --- trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java 2011-10-13 16:26:47 UTC (rev 971) +++ trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java 2011-10-13 18:40:28 UTC (rev 972) @@ -59,15 +59,13 @@ */ private CategoricalMatrix fromTreeBaseToXml(StandardMatrix tbMatrix) { OTUs xmlOTUs = getOTUsById(tbMatrix.getTaxa().getId()); - CategoricalMatrix xmlMatrix = getDocument().createCategoricalMatrix(xmlOTUs); + CategoricalMatrix xmlMatrix = getDocument().createCategoricalMatrix(xmlOTUs); + setMatrixAttributes(xmlMatrix,tbMatrix); - // attach base uri and history note - xmlMatrix.addAnnotationValue("skos:historyNote", Constants.SKOSURI, "Mapped from TreeBASE schema using NexmlMatrixConverter $Rev$"); - xmlMatrix.setBaseURI(mMatrixBaseURI); - List<List<DiscreteCharState>> tbStateLabels = tbMatrix.getStateLabels(); List<MatrixColumn> tbColumns = tbMatrix.getColumnsReadOnly(); for ( int i = 0; i < tbColumns.size(); i++ ) { + MatrixColumn tbColumn = tbColumns.get(i); CharacterStateSet xmlStateSet = xmlMatrix.createCharacterStateSet(); for ( DiscreteCharState tbState : tbStateLabels.get(i) ) { CharacterState xmlState = xmlStateSet.createCharacterState(tbState.getSymbol().toString()); @@ -77,14 +75,31 @@ attachTreeBaseID((Annotatable)xmlState,tbState,DiscreteCharState.class); } org.nexml.model.Character xmlCharacter = xmlMatrix.createCharacter(xmlStateSet); - PhyloChar tbCharacter = tbColumns.get(i).getCharacter(); - if ( null != tbCharacter.getDescription() ) { - xmlCharacter.setLabel(tbCharacter.getDescription()); - } - attachTreeBaseID((Annotatable)xmlCharacter,tbColumns.get(i),MatrixColumn.class); + setCharacterAttributes(tbColumn, xmlCharacter); } return xmlMatrix; } + + private void setCharacterAttributes(MatrixColumn tbColumn,org.nexml.model.Character xmlCharacter) { + PhyloChar tbCharacter = tbColumn.getCharacter(); + if ( null != tbCharacter.getDescription() ) { + xmlCharacter.setLabel(tbCharacter.getLabel()); + } + attachTreeBaseID((Annotatable)xmlCharacter,tbColumn,MatrixColumn.class); + } + + private void setMatrixAttributes(org.nexml.model.Matrix<?> xmlMatrix,CharacterMatrix tbMatrix) { + xmlMatrix.addAnnotationValue("skos:historyNote", Constants.SKOSURI, "Mapped from TreeBASE schema using "+this.toString()+" $Rev$"); + xmlMatrix.setBaseURI(mMatrixBaseURI); + xmlMatrix.setLabel(tbMatrix.getLabel()); + + // attach matrix identifiers + attachTreeBaseID((Annotatable)xmlMatrix, tbMatrix,Matrix.class); + String tb1MatrixID = tbMatrix.getTB1MatrixID(); + if ( null != tb1MatrixID ) { + ((Annotatable)xmlMatrix).addAnnotationValue("tb:identifier.matrix.tb1", Constants.TBTermsURI, tb1MatrixID); + } + } /** * Creates and populates characters (i.e. columns) with their annotations, @@ -112,11 +127,8 @@ xmlMatrix = getDocument().createMolecularMatrix(xmlOTUs, MolecularMatrix.Protein); xmlStateSet = ((MolecularMatrix)xmlMatrix).getProteinCharacterStateSet(); } + setMatrixAttributes(xmlMatrix,tbMatrix); - // attach base uri and history note - xmlMatrix.setBaseURI(mMatrixBaseURI); - xmlMatrix.addAnnotationValue("skos:historyNote", Constants.SKOSURI, "Mapped from TreeBASE schema using NexmlMatrixConverter $Rev$"); - // lookup the equivalent state in tb and attach identifiers for(StateSet tbStateSet : tbMatrix.getStateSets() ) { for (DiscreteCharState tbState : tbStateSet.getStates() ) { @@ -132,7 +144,7 @@ // create columns and attach identifiers for ( MatrixColumn tbColumn : tbMatrix.getColumnsReadOnly() ) { org.nexml.model.Character xmlCharacter = xmlMatrix.createCharacter(xmlStateSet); - attachTreeBaseID((Annotatable)xmlCharacter,tbColumn,MatrixColumn.class); + setCharacterAttributes(tbColumn, xmlCharacter); } return xmlMatrix; } @@ -147,19 +159,11 @@ private org.nexml.model.ContinuousMatrix fromTreeBaseToXml(ContinuousMatrix tbMatrix) { OTUs xmlOTUs = getOTUsById(tbMatrix.getTaxa().getId()); org.nexml.model.ContinuousMatrix xmlMatrix = getDocument().createContinuousMatrix(xmlOTUs); + setMatrixAttributes(xmlMatrix,tbMatrix); - // attach base uri and history note - xmlMatrix.setBaseURI(mMatrixBaseURI); - xmlMatrix.addAnnotationValue("skos:historyNote", Constants.SKOSURI, "Mapped from TreeBASE schema using NexmlMatrixConverter $Rev$"); - for ( MatrixColumn tbColumn : tbMatrix.getColumnsReadOnly() ) { org.nexml.model.Character xmlCharacter = xmlMatrix.createCharacter(); - PhyloChar tbCharacter = tbColumn.getCharacter(); - if ( null != tbCharacter.getDescription() ) { - xmlCharacter.setLabel(tbCharacter.getDescription()); - ((Annotatable)xmlCharacter).addAnnotationValue("dcterms:description", Constants.DCTermsURI, tbCharacter.getDescription()); - } - attachTreeBaseID((Annotatable)xmlCharacter,tbColumn,MatrixColumn.class); + setCharacterAttributes(tbColumn, xmlCharacter); //coerce the tbMatrix into a character matrix to get its character sets CharacterMatrix tbCharacterMatrix = (CharacterMatrix)tbMatrix; @@ -215,12 +219,8 @@ xmlMatrix = fromTreeBaseToXml((ContinuousMatrix) tbMatrix); populateXmlMatrix((org.nexml.model.ContinuousMatrix)xmlMatrix,(ContinuousMatrix)tbMatrix); } - xmlMatrix.setLabel(tbMatrix.getTitle()); - attachTreeBaseID((Annotatable)xmlMatrix, tbMatrix,Matrix.class); - String tb1MatrixID = tbMatrix.getTB1MatrixID(); - if ( null != tb1MatrixID ) { - ((Annotatable)xmlMatrix).addAnnotationValue("tb:identifier.matrix.tb1", Constants.TBTermsURI, tb1MatrixID); - } + + // here we copy the character sets for all matrix types Set<CharSet> tbCharSets = tbMatrix.getCharSets(); for ( CharSet tbCharSet : tbCharSets ) { Collection<ColumnRange> tbColumnRanges = tbCharSet.getColumns(tbMatrix); @@ -234,6 +234,12 @@ // increment from beginning to end. This number is probably either null, for a // contiguous range, or perhaps 3 for codon positions int tbInc = 1; + + // need to do this to prevent nullpointerexceptions + if ( null != tbColumnRange.getRepeatInterval()) { + tbInc = tbColumnRange.getRepeatInterval(); + } + // create the equivalent nexml character set Subset nexSubset = xmlMatrix.createSubset(tbCharSet.getLabel()); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rv...@us...> - 2012-01-31 19:53:18
|
Revision: 1022 http://treebase.svn.sourceforge.net/treebase/?rev=1022&view=rev Author: rvos Date: 2012-01-31 19:53:12 +0000 (Tue, 31 Jan 2012) Log Message: ----------- This should make a single state set for all symbols in a standard matrix. This is more concise so it should fix recent harvesting issues. Modified Paths: -------------- trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java Modified: trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java =================================================================== --- trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java 2012-01-26 16:56:16 UTC (rev 1021) +++ trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java 2012-01-31 19:53:12 UTC (rev 1022) @@ -1,7 +1,10 @@ package org.cipres.treebase.domain.nexus.nexml; import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import org.cipres.treebase.Constants; @@ -34,6 +37,7 @@ import org.nexml.model.OTUs; import org.nexml.model.OTU; import org.nexml.model.Subset; +import org.nexml.model.UncertainCharacterState; public class NexmlMatrixWriter extends NexmlObjectConverter { @@ -64,18 +68,43 @@ CategoricalMatrix xmlMatrix = getDocument().createCategoricalMatrix(xmlOTUs); setMatrixAttributes(xmlMatrix,tbMatrix); + // first flatten the two-dimensional list into a map, we will always only create a single state set List<List<DiscreteCharState>> tbStateLabels = tbMatrix.getStateLabels(); + Map<Character,DiscreteCharState> stateForSymbol = new HashMap<Character,DiscreteCharState>(); + CharacterStateSet xmlStateSet = xmlMatrix.createCharacterStateSet(); + for ( int i = 0; i < tbStateLabels.size(); i++ ) { + for ( int j = 0; j < tbStateLabels.get(i).size(); j++ ) { + Character symbol = tbStateLabels.get(i).get(j).getSymbol(); + DiscreteCharState state = tbStateLabels.get(i).get(j); + stateForSymbol.put(symbol, state); + } + } + UncertainCharacterState missing = xmlStateSet.createUncertainCharacterState("?", new HashSet<CharacterState>()); + UncertainCharacterState gap = xmlStateSet.createUncertainCharacterState("-", new HashSet<CharacterState>()); + missing.getStates().add(gap); + + // then create the single state set out of the map, assigning all non-gap characters to missing + for ( Character symbol : stateForSymbol.keySet() ) { + CharacterState xmlState = null; + if ( symbol.charValue() == '?' ) { + xmlState = missing; + } + else if ( symbol.charValue() == '-' ) { + xmlState = gap; + } + else { + xmlState = xmlStateSet.createCharacterState(symbol.toString()); + missing.getStates().add(xmlState); + } + DiscreteCharState tbState = stateForSymbol.get(symbol); + xmlState.setLabel(tbState.getLabel()); + attachTreeBaseID((Annotatable)xmlState,tbState,DiscreteCharState.class); + } + + // then create the XML characters, assigning them all the same state set List<MatrixColumn> tbColumns = tbMatrix.getColumnsReadOnly(); for ( int i = 0; i < tbColumns.size(); i++ ) { - MatrixColumn tbColumn = tbColumns.get(i); - CharacterStateSet xmlStateSet = xmlMatrix.createCharacterStateSet(); - for ( DiscreteCharState tbState : tbStateLabels.get(i) ) { - CharacterState xmlState = xmlStateSet.createCharacterState(tbState.getSymbol().toString()); - if ( null != tbState.getDescription() ) { - xmlState.setLabel(tbState.getDescription()); - } - attachTreeBaseID((Annotatable)xmlState,tbState,DiscreteCharState.class); - } + MatrixColumn tbColumn = tbColumns.get(i); org.nexml.model.Character xmlCharacter = xmlMatrix.createCharacter(xmlStateSet); setCharacterAttributes(tbColumn, xmlCharacter); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rv...@us...> - 2012-01-31 20:21:46
|
Revision: 1023 http://treebase.svn.sourceforge.net/treebase/?rev=1023&view=rev Author: rvos Date: 2012-01-31 20:21:36 +0000 (Tue, 31 Jan 2012) Log Message: ----------- Re-ordered state set generation so that "missing" is populated with all other states. Also, set the assignment of the XML id before any annotations are added, this so that the about attribute is updated correctly. Modified Paths: -------------- trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java Modified: trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java =================================================================== --- trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java 2012-01-31 19:53:12 UTC (rev 1022) +++ trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java 2012-01-31 20:21:36 UTC (rev 1023) @@ -79,27 +79,23 @@ stateForSymbol.put(symbol, state); } } - UncertainCharacterState missing = xmlStateSet.createUncertainCharacterState("?", new HashSet<CharacterState>()); - UncertainCharacterState gap = xmlStateSet.createUncertainCharacterState("-", new HashSet<CharacterState>()); - missing.getStates().add(gap); - // then create the single state set out of the map, assigning all non-gap characters to missing + // then create the single state set out of the map, assigning all non-missing characters to missing + Set<CharacterState> xmlMissingStates = new HashSet<CharacterState>(); for ( Character symbol : stateForSymbol.keySet() ) { - CharacterState xmlState = null; - if ( symbol.charValue() == '?' ) { - xmlState = missing; + if ( symbol.charValue() != '?' && symbol.charValue() != '-' ) { + CharacterState xmlState = xmlStateSet.createCharacterState(symbol.toString()); + DiscreteCharState tbState = stateForSymbol.get(symbol); + xmlState.setLabel(tbState.getLabel()); + attachTreeBaseID((Annotatable)xmlState,tbState,DiscreteCharState.class); + xmlMissingStates.add(xmlState); } - else if ( symbol.charValue() == '-' ) { - xmlState = gap; - } - else { - xmlState = xmlStateSet.createCharacterState(symbol.toString()); - missing.getStates().add(xmlState); - } - DiscreteCharState tbState = stateForSymbol.get(symbol); - xmlState.setLabel(tbState.getLabel()); - attachTreeBaseID((Annotatable)xmlState,tbState,DiscreteCharState.class); - } + } + UncertainCharacterState gap = xmlStateSet.createUncertainCharacterState("-", new HashSet<CharacterState>()); + xmlMissingStates.add(gap); + UncertainCharacterState missing = xmlStateSet.createUncertainCharacterState("?", xmlMissingStates); + missing.setLabel("?"); + gap.setLabel("-"); // then create the XML characters, assigning them all the same state set List<MatrixColumn> tbColumns = tbMatrix.getColumnsReadOnly(); @@ -120,16 +116,16 @@ } private void setMatrixAttributes(org.nexml.model.Matrix<?> xmlMatrix,CharacterMatrix tbMatrix) { - xmlMatrix.addAnnotationValue("skos:historyNote", Constants.SKOSURI, "Mapped from TreeBASE schema using "+this.toString()+" $Rev$"); - xmlMatrix.setBaseURI(mMatrixBaseURI); - xmlMatrix.setLabel(tbMatrix.getLabel()); - // attach matrix identifiers attachTreeBaseID((Annotatable)xmlMatrix, tbMatrix,Matrix.class); String tb1MatrixID = tbMatrix.getTB1MatrixID(); if ( null != tb1MatrixID ) { ((Annotatable)xmlMatrix).addAnnotationValue("tb:identifier.matrix.tb1", Constants.TBTermsURI, tb1MatrixID); } + + xmlMatrix.addAnnotationValue("skos:historyNote", Constants.SKOSURI, "Mapped from TreeBASE schema using "+this.toString()+" $Rev$"); + xmlMatrix.setBaseURI(mMatrixBaseURI); + xmlMatrix.setLabel(tbMatrix.getLabel()); } /** This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rv...@us...> - 2012-01-31 20:53:41
|
Revision: 1024 http://treebase.svn.sourceforge.net/treebase/?rev=1024&view=rev Author: rvos Date: 2012-01-31 20:53:32 +0000 (Tue, 31 Jan 2012) Log Message: ----------- This should result in the correct identifiers being set for each state, and all states other than missing being added to the set of possible states within the missing state set. This includes the gap character. Modified Paths: -------------- trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java Modified: trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java =================================================================== --- trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java 2012-01-31 20:21:36 UTC (rev 1023) +++ trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java 2012-01-31 20:53:32 UTC (rev 1024) @@ -82,20 +82,28 @@ // then create the single state set out of the map, assigning all non-missing characters to missing Set<CharacterState> xmlMissingStates = new HashSet<CharacterState>(); + UncertainCharacterState missing = null; for ( Character symbol : stateForSymbol.keySet() ) { - if ( symbol.charValue() != '?' && symbol.charValue() != '-' ) { - CharacterState xmlState = xmlStateSet.createCharacterState(symbol.toString()); - DiscreteCharState tbState = stateForSymbol.get(symbol); - xmlState.setLabel(tbState.getLabel()); - attachTreeBaseID((Annotatable)xmlState,tbState,DiscreteCharState.class); + char sym = symbol.charValue(); + CharacterState xmlState = null; + String symString = symbol.toString(); + if ( sym == '-' ) { + xmlState = xmlStateSet.createUncertainCharacterState(symString, new HashSet<CharacterState>()); xmlMissingStates.add(xmlState); } - } - UncertainCharacterState gap = xmlStateSet.createUncertainCharacterState("-", new HashSet<CharacterState>()); - xmlMissingStates.add(gap); - UncertainCharacterState missing = xmlStateSet.createUncertainCharacterState("?", xmlMissingStates); - missing.setLabel("?"); - gap.setLabel("-"); + else if ( sym == '?' ) { + xmlState = xmlStateSet.createUncertainCharacterState(symString, new HashSet<CharacterState>()); + missing = (UncertainCharacterState) xmlState; + } + else { + xmlState = xmlStateSet.createCharacterState(symString); + xmlMissingStates.add(xmlState); + } + DiscreteCharState tbState = stateForSymbol.get(symbol); + xmlState.setLabel(symString); + attachTreeBaseID((Annotatable)xmlState,tbState,DiscreteCharState.class); + } + missing.setStates(xmlMissingStates); // then create the XML characters, assigning them all the same state set List<MatrixColumn> tbColumns = tbMatrix.getColumnsReadOnly(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rv...@us...> - 2012-01-31 21:10:54
|
Revision: 1025 http://treebase.svn.sourceforge.net/treebase/?rev=1025&view=rev Author: rvos Date: 2012-01-31 21:10:45 +0000 (Tue, 31 Jan 2012) Log Message: ----------- Added test to ensure a missing state has been seen in the matrix. Modified Paths: -------------- trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java Modified: trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java =================================================================== --- trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java 2012-01-31 20:53:32 UTC (rev 1024) +++ trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java 2012-01-31 21:10:45 UTC (rev 1025) @@ -103,7 +103,9 @@ xmlState.setLabel(symString); attachTreeBaseID((Annotatable)xmlState,tbState,DiscreteCharState.class); } - missing.setStates(xmlMissingStates); + if ( null != missing ) { + missing.setStates(xmlMissingStates); + } // then create the XML characters, assigning them all the same state set List<MatrixColumn> tbColumns = tbMatrix.getColumnsReadOnly(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rv...@us...> - 2012-01-31 21:23:39
|
Revision: 1026 http://treebase.svn.sourceforge.net/treebase/?rev=1026&view=rev Author: rvos Date: 2012-01-31 21:23:29 +0000 (Tue, 31 Jan 2012) Log Message: ----------- Apparently, missing and gap state sets aren't considered states that a DicreteMatrix would return in its set of sets. We will just have to hardcode them in. Modified Paths: -------------- trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java Modified: trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java =================================================================== --- trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java 2012-01-31 21:10:45 UTC (rev 1025) +++ trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java 2012-01-31 21:23:29 UTC (rev 1026) @@ -82,31 +82,24 @@ // then create the single state set out of the map, assigning all non-missing characters to missing Set<CharacterState> xmlMissingStates = new HashSet<CharacterState>(); - UncertainCharacterState missing = null; for ( Character symbol : stateForSymbol.keySet() ) { - char sym = symbol.charValue(); - CharacterState xmlState = null; - String symString = symbol.toString(); - if ( sym == '-' ) { - xmlState = xmlStateSet.createUncertainCharacterState(symString, new HashSet<CharacterState>()); + char sym = symbol.charValue(); + if ( sym != '-' && sym != '?' ) { + String symString = symbol.toString(); + CharacterState xmlState = xmlStateSet.createCharacterState(symString); xmlMissingStates.add(xmlState); - } - else if ( sym == '?' ) { - xmlState = xmlStateSet.createUncertainCharacterState(symString, new HashSet<CharacterState>()); - missing = (UncertainCharacterState) xmlState; - } - else { - xmlState = xmlStateSet.createCharacterState(symString); - xmlMissingStates.add(xmlState); + DiscreteCharState tbState = stateForSymbol.get(symbol); + xmlState.setLabel(symString); + attachTreeBaseID((Annotatable)xmlState,tbState,DiscreteCharState.class); } - DiscreteCharState tbState = stateForSymbol.get(symbol); - xmlState.setLabel(symString); - attachTreeBaseID((Annotatable)xmlState,tbState,DiscreteCharState.class); } - if ( null != missing ) { - missing.setStates(xmlMissingStates); - } + UncertainCharacterState gap = xmlStateSet.createUncertainCharacterState("-", new HashSet<CharacterState>()); + gap.setLabel("-"); + xmlMissingStates.add(gap); + UncertainCharacterState missing = xmlStateSet.createUncertainCharacterState("?", xmlMissingStates); + missing.setLabel("?"); + // then create the XML characters, assigning them all the same state set List<MatrixColumn> tbColumns = tbMatrix.getColumnsReadOnly(); for ( int i = 0; i < tbColumns.size(); i++ ) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rv...@us...> - 2012-02-01 21:40:50
|
Revision: 1036 http://treebase.svn.sourceforge.net/treebase/?rev=1036&view=rev Author: rvos Date: 2012-02-01 21:40:44 +0000 (Wed, 01 Feb 2012) Log Message: ----------- Now generates character state sequences that only contain schema-valid IUPAC single character symbols like N, not constructs like {ACGT}, for indicating uncertainty. Modified Paths: -------------- trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java Modified: trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java =================================================================== --- trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java 2012-02-01 21:33:34 UTC (rev 1035) +++ trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java 2012-02-01 21:40:44 UTC (rev 1036) @@ -308,7 +308,7 @@ int charIndex = 0; if ( characterList.size() <= MAX_GRANULAR_NCHAR && xmlOTUs.getAllOTUs().size() <= MAX_GRANULAR_NTAX ) { for ( MatrixColumn tbColumn : ((CharacterMatrix)tbMatrix).getColumns() ) { - String seq = tbRow.buildElementAsString(); + String seq = tbRow.getNormalizedSymbolString(); xmlMatrix.setSeq(seq, xmlOTU); org.nexml.model.Character xmlCharacter = characterList.get(charIndex); MatrixCell<CharacterState> xmlCell = xmlMatrix.getCell(xmlOTU, xmlCharacter); @@ -428,7 +428,7 @@ } } else { - String seq = tbRow.buildElementAsString(); + String seq = tbRow.getNormalizedSymbolString(); if ( tbMatrix.getDataType().getDescription().equals(MatrixDataType.MATRIX_DATATYPE_STANDARD) ) { StringBuilder sb = new StringBuilder(); for ( int i = 0; i < seq.length(); i++ ) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rv...@us...> - 2012-02-03 23:47:32
|
Revision: 1051 http://treebase.svn.sourceforge.net/treebase/?rev=1051&view=rev Author: rvos Date: 2012-02-03 23:47:25 +0000 (Fri, 03 Feb 2012) Log Message: ----------- *** This commit is supposed to make matrix generation more efficient. Modified Paths: -------------- trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java Modified: trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java =================================================================== --- trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java 2012-02-03 23:10:39 UTC (rev 1050) +++ trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java 2012-02-03 23:47:25 UTC (rev 1051) @@ -26,6 +26,7 @@ import org.cipres.treebase.domain.matrix.StandardMatrix; import org.cipres.treebase.domain.matrix.StateSet; import org.cipres.treebase.domain.study.Study; +import org.cipres.treebase.domain.taxon.SpecimenLabel; import org.cipres.treebase.domain.taxon.TaxonLabelHome; import org.nexml.model.Annotatable; import org.nexml.model.CategoricalMatrix; @@ -51,22 +52,85 @@ */ public NexmlMatrixWriter(Study study,TaxonLabelHome taxonLabelHome,Document document) { super(study,taxonLabelHome,document); + } + + /** + * This is the method that is called by the NexmlDocumentWriter when turning + * a study or data set into a NeXML document + * @param tbMatrix + * @param xmlOTUs + * @return + * XXX doesn't handle the following data types: + * public static final String MATRIX_DATATYPE_NUCLEOTIDE = "Nucleotide"; + * public static final String MATRIX_DATATYPE_DISTANCE = "Distance"; + * public static final String MATRIX_DATATYPE_MIXED = "Mixed"; + */ + public org.nexml.model.Matrix<?> fromTreeBaseToXml(CharacterMatrix tbMatrix,OTUs xmlOTUs) { + + // here we decide what subtype of character matrix to instantiate + org.nexml.model.Matrix<?> xmlMatrix = createMatrix(tbMatrix, xmlOTUs); + + // here we create column/character sets + createCharacterSets(tbMatrix, xmlMatrix); + + return xmlMatrix; + } + + + + /** + * + * @param tbMatrix + * @param xmlOTUs + * @return + */ + private org.nexml.model.Matrix<?> createMatrix(CharacterMatrix tbMatrix, OTUs xmlOTUs) { + + // here we decide what (super-)type to instantiate: discrete or continuous + if ( tbMatrix instanceof DiscreteMatrix ) { + org.nexml.model.Matrix<CharacterState> xmlDiscreteMatrix = null; + + // 'standard' data is treated separately because we don't have an alphabet for it + if ( tbMatrix.getDataType().getDescription().equals(MatrixDataType.MATRIX_DATATYPE_STANDARD) ) { + + // standard categorical + xmlDiscreteMatrix = createStandardNexmlMatrix((StandardMatrix) tbMatrix,xmlOTUs); + } + else { + + // molecular + xmlDiscreteMatrix = createMolecularNexmlMatrix((DiscreteMatrix) tbMatrix,xmlOTUs); + } + populateDiscreteNexmlMatrix(xmlDiscreteMatrix,(DiscreteMatrix)tbMatrix); + return xmlDiscreteMatrix; + } + else if ( tbMatrix instanceof ContinuousMatrix ) { + + // continuous + org.nexml.model.ContinuousMatrix xmlContinuousMatrix = createContinuousNexmlMatrix((ContinuousMatrix) tbMatrix,xmlOTUs); + populateContinuousNexmlMatrix(xmlContinuousMatrix,(ContinuousMatrix)tbMatrix); + return xmlContinuousMatrix; + } + return null; } /** * Creates and populates characters (i.e. columns) with their annotations, - * and state sets, with their annotations + * and state sets, with their annotations. For standard data (including + * those matrices that are actually mostly molecular) we flatten the + * (fictional, but modeled) stateset mapping of all state symbols, plus + * missing ('?') and gap ('-'). * * @param tbMatrix * @return an xml matrix with empty rows */ - private CategoricalMatrix fromTreeBaseToXml(StandardMatrix tbMatrix,OTUs xmlOTUs) { + private CategoricalMatrix createStandardNexmlMatrix(StandardMatrix tbMatrix,OTUs xmlOTUs) { if ( null == xmlOTUs ) { xmlOTUs = getOTUsById(tbMatrix.getTaxa().getId()); } CategoricalMatrix xmlMatrix = getDocument().createCategoricalMatrix(xmlOTUs); - setMatrixAttributes(xmlMatrix,tbMatrix); + copyMatrixAttributes(tbMatrix,xmlMatrix); // first flatten the two-dimensional list into a map, we will always only create a single state set List<List<DiscreteCharState>> tbStateLabels = tbMatrix.getStateLabels(); @@ -94,6 +158,7 @@ } } + // the missing symbol ("?") includes all others, including gap ("-") UncertainCharacterState gap = xmlStateSet.createUncertainCharacterState("-", new HashSet<CharacterState>()); gap.setLabel("-"); xmlMissingStates.add(gap); @@ -105,32 +170,13 @@ for ( int i = 0; i < tbColumns.size(); i++ ) { MatrixColumn tbColumn = tbColumns.get(i); org.nexml.model.Character xmlCharacter = xmlMatrix.createCharacter(xmlStateSet); - setCharacterAttributes(tbColumn, xmlCharacter); + copyCharacterAttributes(tbColumn, xmlCharacter); } return xmlMatrix; } - - private void setCharacterAttributes(MatrixColumn tbColumn,org.nexml.model.Character xmlCharacter) { - PhyloChar tbCharacter = tbColumn.getCharacter(); - if ( null != tbCharacter.getDescription() ) { - xmlCharacter.setLabel(tbCharacter.getLabel()); - } - attachTreeBaseID((Annotatable)xmlCharacter,tbColumn,MatrixColumn.class); - } - - private void setMatrixAttributes(org.nexml.model.Matrix<?> xmlMatrix,CharacterMatrix tbMatrix) { - // attach matrix identifiers - attachTreeBaseID((Annotatable)xmlMatrix, tbMatrix,Matrix.class); - String tb1MatrixID = tbMatrix.getTB1MatrixID(); - if ( null != tb1MatrixID ) { - ((Annotatable)xmlMatrix).addAnnotationValue("tb:identifier.matrix.tb1", Constants.TBTermsURI, tb1MatrixID); - } - - xmlMatrix.addAnnotationValue("skos:historyNote", Constants.SKOSURI, "Mapped from TreeBASE schema using "+this.toString()+" $Rev$"); - xmlMatrix.setBaseURI(mMatrixBaseURI); - xmlMatrix.setLabel(tbMatrix.getLabel()); - } + + /** * Creates and populates characters (i.e. columns) with their annotations, * and state sets, with their annotations @@ -138,7 +184,7 @@ * @param tbMatrix * @return an xml matrix with empty rows */ - private MolecularMatrix fromTreeBaseToXml(DiscreteMatrix tbMatrix,OTUs xmlOTUs) { + private MolecularMatrix createMolecularNexmlMatrix(DiscreteMatrix tbMatrix,OTUs xmlOTUs) { if ( null == xmlOTUs ) { xmlOTUs = getOTUsById(tbMatrix.getTaxa().getId()); } @@ -146,7 +192,7 @@ MolecularMatrix xmlMatrix = null; CharacterStateSet xmlStateSet = null; - // create the matrix and constant state set + // create the matrix and constant (IUPAC) state set if ( tbDataType.equals(MatrixDataType.MATRIX_DATATYPE_DNA) ) { xmlMatrix = getDocument().createMolecularMatrix(xmlOTUs, MolecularMatrix.DNA); xmlStateSet = ((MolecularMatrix)xmlMatrix).getDNACharacterStateSet(); @@ -159,7 +205,7 @@ xmlMatrix = getDocument().createMolecularMatrix(xmlOTUs, MolecularMatrix.Protein); xmlStateSet = ((MolecularMatrix)xmlMatrix).getProteinCharacterStateSet(); } - setMatrixAttributes(xmlMatrix,tbMatrix); + copyMatrixAttributes(tbMatrix,xmlMatrix); // lookup the equivalent state in tb and attach identifiers for(StateSet tbStateSet : tbMatrix.getStateSets() ) { @@ -176,7 +222,7 @@ // create columns and attach identifiers for ( MatrixColumn tbColumn : tbMatrix.getColumnsReadOnly() ) { org.nexml.model.Character xmlCharacter = xmlMatrix.createCharacter(xmlStateSet); - setCharacterAttributes(tbColumn, xmlCharacter); + copyCharacterAttributes(tbColumn, xmlCharacter); } return xmlMatrix; } @@ -188,16 +234,16 @@ * @param tbMatrix * @return an xml matrix with empty rows */ - private org.nexml.model.ContinuousMatrix fromTreeBaseToXml(ContinuousMatrix tbMatrix,OTUs xmlOTUs) { + private org.nexml.model.ContinuousMatrix createContinuousNexmlMatrix(ContinuousMatrix tbMatrix,OTUs xmlOTUs) { if ( null == xmlOTUs ) { xmlOTUs = getOTUsById(tbMatrix.getTaxa().getId()); } org.nexml.model.ContinuousMatrix xmlMatrix = getDocument().createContinuousMatrix(xmlOTUs); - setMatrixAttributes(xmlMatrix,tbMatrix); + copyMatrixAttributes(tbMatrix,xmlMatrix); for ( MatrixColumn tbColumn : tbMatrix.getColumnsReadOnly() ) { org.nexml.model.Character xmlCharacter = xmlMatrix.createCharacter(); - setCharacterAttributes(tbColumn, xmlCharacter); + copyCharacterAttributes(tbColumn, xmlCharacter); //coerce the tbMatrix into a character matrix to get its character sets CharacterMatrix tbCharacterMatrix = (CharacterMatrix)tbMatrix; @@ -225,35 +271,18 @@ nexSubset.addThing(nexCharacters.get(i)); } } - } - + } } return xmlMatrix; } - -// XXX doesn't handle the following data types: -// public static final String MATRIX_DATATYPE_NUCLEOTIDE = "Nucleotide"; -// public static final String MATRIX_DATATYPE_DISTANCE = "Distance"; -// public static final String MATRIX_DATATYPE_MIXED = "Mixed"; - @SuppressWarnings("unchecked") - public org.nexml.model.Matrix<?> fromTreeBaseToXml(CharacterMatrix tbMatrix,OTUs xmlOTUs) { - org.nexml.model.Matrix<?> xmlMatrix = null; - if ( tbMatrix instanceof DiscreteMatrix ) { - if ( tbMatrix.getDataType().getDescription().equals(MatrixDataType.MATRIX_DATATYPE_STANDARD) ) { - xmlMatrix = fromTreeBaseToXml((StandardMatrix) tbMatrix,xmlOTUs); - } - else { - xmlMatrix = fromTreeBaseToXml((DiscreteMatrix) tbMatrix,xmlOTUs); - } - populateXmlMatrix((org.nexml.model.Matrix<CharacterState>)xmlMatrix,(DiscreteMatrix)tbMatrix); - } - else if ( tbMatrix instanceof ContinuousMatrix ) { - xmlMatrix = fromTreeBaseToXml((ContinuousMatrix) tbMatrix,xmlOTUs); - populateXmlMatrix((org.nexml.model.ContinuousMatrix)xmlMatrix,(ContinuousMatrix)tbMatrix); - } - + /** + * + * @param tbMatrix + * @param xmlMatrix + */ + private void createCharacterSets(CharacterMatrix tbMatrix, org.nexml.model.Matrix<?> xmlMatrix) { // here we copy the character sets for all matrix types Set<CharSet> tbCharSets = tbMatrix.getCharSets(); for ( CharSet tbCharSet : tbCharSets ) { @@ -270,25 +299,24 @@ int tbInc = 1; // need to do this to prevent nullpointerexceptions - if ( null != tbColumnRange.getRepeatInterval()) { - tbInc = tbColumnRange.getRepeatInterval(); + Integer tbRepeatInterval = tbColumnRange.getRepeatInterval(); + if ( null != tbRepeatInterval ) { + tbInc = tbRepeatInterval; } // create the equivalent nexml character set - Subset nexSubset = xmlMatrix.createSubset(tbCharSet.getLabel()); + Subset xmlSubset = xmlMatrix.createSubset(tbCharSet.getLabel()); // assign character objects to the subset. Here we get the full list - List<org.nexml.model.Character> nexCharacters = xmlMatrix.getCharacters(); + List<org.nexml.model.Character> xmlCharacters = xmlMatrix.getCharacters(); // now we iterate over the coordinates and assign the nexml characters to the set for ( int i = tbStart; i <= tbStop; i += tbInc ) { - nexSubset.addThing(nexCharacters.get(i)); + xmlSubset.addThing(xmlCharacters.get(i)); } } } - - return xmlMatrix; - } + } /** * @@ -297,135 +325,17 @@ * @param xmlOTUs * @param stateSet */ - private void populateXmlMatrix( - org.nexml.model.Matrix<CharacterState> xmlMatrix, - DiscreteMatrix tbMatrix) { + private void populateDiscreteNexmlMatrix(org.nexml.model.Matrix<CharacterState> xmlMatrix, DiscreteMatrix tbMatrix) { + OTUs xmlOTUs = xmlMatrix.getOTUs(); - List<org.nexml.model.Character> characterList = xmlMatrix.getCharacters(); + List<org.nexml.model.Character> xmlCharacters = xmlMatrix.getCharacters(); + + // iterates over all matrix rows, i.e. ntax times for ( MatrixRow tbRow : tbMatrix.getRowsReadOnly() ) { Set<RowSegment> tbSegments = tbRow.getSegmentsReadOnly(); OTU xmlOTU = getOTUById(xmlOTUs, tbRow.getTaxonLabel().getId()); - int charIndex = 0; - if ( characterList.size() <= MAX_GRANULAR_NCHAR && xmlOTUs.getAllOTUs().size() <= MAX_GRANULAR_NTAX ) { - for ( MatrixColumn tbColumn : ((CharacterMatrix)tbMatrix).getColumns() ) { - String seq = tbRow.getNormalizedSymbolString(); - xmlMatrix.setSeq(seq, xmlOTU); - org.nexml.model.Character xmlCharacter = characterList.get(charIndex); - MatrixCell<CharacterState> xmlCell = xmlMatrix.getCell(xmlOTU, xmlCharacter); - - attachTreeBaseID ((Annotatable) xmlCell, tbColumn , DiscreteMatrixElement.class); - - //The following is commented out as tbRow.getElements() does not work directly and crashes the loop. - //The above for loop fixes this issue. - /* - for ( MatrixElement tbCell : tbRow.getElements() ) { - org.nexml.model.Character xmlCharacter = characterList.get(charIndex); - MatrixCell<CharacterState> xmlCell = xmlMatrix.getCell(xmlOTU, xmlCharacter); - DiscreteCharState tbState = ((DiscreteMatrixElement)tbCell).getCharState(); - String tbSymbolString = ( null == tbState ) ? "?" : tbState.getSymbol().toString(); - CharacterState xmlState = xmlCharacter.getCharacterStateSet().lookupCharacterStateBySymbol(tbSymbolString); - xmlCell.setValue(xmlState); - attachTreeBaseID((Annotatable)xmlCell,tbCell,DiscreteMatrixElement.class); - */ - - for ( RowSegment tbSegment : tbSegments ) { - if ( tbSegment.getStartIndex() <= charIndex && charIndex <= tbSegment.getEndIndex() ) { - //declare variables for row-segment annotations - String title = tbSegment.getTitle(); - String institutionCode = tbSegment.getSpecimenLabel().getInstAcronym(); - String collectionCode = tbSegment.getSpecimenLabel().getCollectionCode(); - String catalogNumber = tbSegment.getSpecimenLabel().getCatalogNumber(); - String accessionNumber = tbSegment.getSpecimenLabel().getGenBankAccession(); - String otherAccessionNumber = tbSegment.getSpecimenLabel().getOtherAccession(); - String dateSampled = tbSegment.getSpecimenLabel().getSampleDateString(); - String scientificName = tbSegment.getSpecimenTaxonLabelAsString(); - String collector = tbSegment.getSpecimenLabel().getCollector(); - Double latitude = tbSegment.getSpecimenLabel().getLatitude(); - Double longitude = tbSegment.getSpecimenLabel().getLongitude(); - Double elevation = tbSegment.getSpecimenLabel().getElevation(); - String country = tbSegment.getSpecimenLabel().getCountry(); - String state = tbSegment.getSpecimenLabel().getState(); - String locality = tbSegment.getSpecimenLabel().getLocality(); - String notes = tbSegment.getSpecimenLabel().getNotes(); - - //if the value is not null, output the xmlOTU annotation. - //DwC refers to the Darwin Core term vocabulary for the associated annotation - if (null != title){ - //output name identifying the data set from which the record was derived - ((Annotatable)xmlCell).addAnnotationValue("DwC:datasetName", Constants.DwCURI, title); - } - if ( null != institutionCode ) { - //output name or acronym of institution that has custody of information referred to in record - ((Annotatable)xmlCell).addAnnotationValue("DwC:institutionCode", Constants.DwCURI, institutionCode); - } - if ( null != collectionCode ) { - //output name or code that identifies collection or data set from which record was derived - ((Annotatable)xmlCell).addAnnotationValue ("DwC:collectionCode", Constants.DwCURI, collectionCode); - } - if ( null != catalogNumber ){ - //output unique (usually) identifier for the record within data set or collection - ((Annotatable)xmlCell).addAnnotationValue("DwC:catalogNumber", Constants.DwCURI, catalogNumber); - } - if ( null != accessionNumber) { - //output a list of genetic sequence information associated with occurrence - ((Annotatable)xmlCell).addAnnotationValue("DwC:associatedSequences", Constants.DwCURI, accessionNumber); - } - if ( null != otherAccessionNumber ) { - //list of previous or alternate fully catalog numbers (i.e. Genbank) or human-used identifiers - ((Annotatable)xmlCell).addAnnotationValue("DwC:otherCatalogNumbers", Constants.DwCURI, otherAccessionNumber); - } - if ( null != dateSampled ) { - //output date sampled in ISO 8601 format - ((Annotatable)xmlCell).addAnnotationValue("DwC:eventDate", Constants.DwCURI, dateSampled); - } - if ( null != scientificName ) { - //output full scientific name - ((Annotatable)xmlCell).addAnnotationValue("DwC:scientificName", Constants.DwCURI, scientificName); - } - if ( null != collector ) { - //output names of people associated with recording of original occurrence - ((Annotatable)xmlCell).addAnnotationValue("DwC:recordedBy", Constants.DwCURI, collector); - } - if ( null != latitude ) { - //output geographic latitude in decimal degrees using geodeticDatum spatial reference system - ((Annotatable)xmlCell).addAnnotationValue("DwC:decimalLatitude", Constants.DwCURI, latitude); - } - if ( null != longitude ) { - //output geographic longitude in decimal degrees using geodeticDatum spatial reference system - ((Annotatable)xmlCell).addAnnotationValue("DwC:decimalLongitude", Constants.DwCURI, longitude); - } - if ( null != elevation ) { - //there are two different Darwin Core terms for elevation depending on elevation value - //outputs geographic elevation of sample - if ( elevation >= 0) { - //above local surface in meters - ((Annotatable)xmlCell).addAnnotationValue("DwC:verbatimElevation", Constants.DwCURI, elevation); - } - else { - //below local surface in meters - ((Annotatable)xmlCell).addAnnotationValue("DwC:verbatimDepth", Constants.DwCURI, elevation); - } - } - if ( null != country ) { - //output country in which location occurs - ((Annotatable)xmlCell).addAnnotationValue("DwC:country", Constants.DwCURI, country); - } - if ( null != state ) { - //output name of next smaller administrative region than country (i.e. state, province, region) - ((Annotatable)xmlCell).addAnnotationValue ("DwC:stateProvince", Constants.DwCURI, state); - } - if ( null != locality) { - //output brief description of sample location - ((Annotatable)xmlCell).addAnnotationValue("DwC:locality", Constants.DwCURI, locality); - } - if ( null != notes ) { - //output any additional information about specimen - ((Annotatable)xmlCell).addAnnotationValue("DwC:occurenceRemarks", Constants.DwCURI, notes); - } - } - } - charIndex++; - } + if ( xmlCharacters.size() <= MAX_GRANULAR_NCHAR && xmlOTUs.getAllOTUs().size() <= MAX_GRANULAR_NTAX ) { + populateDiscreteVerboseNexmlMatrix(xmlMatrix,tbMatrix,xmlCharacters,tbRow,tbSegments,xmlOTU); } else { String seq = tbRow.getNormalizedSymbolString(); @@ -439,102 +349,49 @@ } } xmlMatrix.setSeq(seq,xmlOTU); + + // this often only happens once, when the row has only 1 segment + for ( RowSegment tbSegment : tbSegments ) { + copyDarwinCoreAnnotations(tbSegment, xmlOTU); + } } - for ( RowSegment tbSegment : tbSegments ) { - //declare variables for row-segment annotations - String title = tbSegment.getTitle(); - String institutionCode = tbSegment.getSpecimenLabel().getInstAcronym(); - String collectionCode = tbSegment.getSpecimenLabel().getCollectionCode(); - String catalogNumber = tbSegment.getSpecimenLabel().getCatalogNumber(); - String accessionNumber = tbSegment.getSpecimenLabel().getGenBankAccession(); - String otherAccessionNumber = tbSegment.getSpecimenLabel().getOtherAccession(); - String dateSampled = tbSegment.getSpecimenLabel().getSampleDateString(); - String scientificName = tbSegment.getSpecimenTaxonLabelAsString(); - String collector = tbSegment.getSpecimenLabel().getCollector(); - Double latitude = tbSegment.getSpecimenLabel().getLatitude(); - Double longitude = tbSegment.getSpecimenLabel().getLongitude(); - Double elevation = tbSegment.getSpecimenLabel().getElevation(); - String country = tbSegment.getSpecimenLabel().getCountry(); - String state = tbSegment.getSpecimenLabel().getState(); - String locality = tbSegment.getSpecimenLabel().getLocality(); - String notes = tbSegment.getSpecimenLabel().getNotes(); + } + } + + /** + * + * @param xmlMatrix + * @param tbMatrix + * @param xmlCharacterList + * @param tbRow + * @param tbSegments + * @param xmlOTU + */ + private void populateDiscreteVerboseNexmlMatrix( + org.nexml.model.Matrix<CharacterState> xmlMatrix, + DiscreteMatrix tbMatrix, + List<org.nexml.model.Character> xmlCharacterList,MatrixRow tbRow, + Set<RowSegment> tbSegments, OTU xmlOTU) { - //if the value is not null, output the xmlOTU annotation. - //DwC refers to the Darwin Core term vocabulary for the associated annotation - if (null != title){ - //output name identifying the data set from which the record was derived - xmlOTU.addAnnotationValue("DwC:datasetName", Constants.DwCURI, title); + // iterates over all characters, i.e. nchar times + int charIndex = 0; + String seq = tbRow.getSymbolString(); + for ( MatrixColumn tbColumn : ((CharacterMatrix)tbMatrix).getColumns() ) { + + org.nexml.model.Character xmlCharacter = xmlCharacterList.get(charIndex); + MatrixCell<CharacterState> xmlCell = xmlMatrix.getCell(xmlOTU, xmlCharacter); + String value = "" + seq.charAt(charIndex); + CharacterState xmlState = xmlMatrix.parseSymbol(value); + xmlCell.setValue(xmlState); + attachTreeBaseID ((Annotatable) xmlCell, tbColumn , DiscreteMatrixElement.class); + + for ( RowSegment tbSegment : tbSegments ) { + if ( tbSegment.getStartIndex() <= charIndex && charIndex <= tbSegment.getEndIndex() ) { + copyDarwinCoreAnnotations(tbSegment, (Annotatable)xmlCell); } - if ( null != institutionCode ) { - //output name or acronym of institution that has custody of information referred to in record - xmlOTU.addAnnotationValue("DwC:institutionCode", Constants.DwCURI, institutionCode); - } - if ( null != collectionCode ) { - //output name or code that identifies collection or data set from which record was derived - xmlOTU.addAnnotationValue ("DwC:collectionCode", Constants.DwCURI, collectionCode); - } - if ( null != catalogNumber ){ - //output unique (usually) identifier for the record within data set or collection - xmlOTU.addAnnotationValue("DwC:catalogNumber", Constants.DwCURI, catalogNumber); - } - if ( null != accessionNumber) { - //output a list of genetic sequence information associated with occurrence - xmlOTU.addAnnotationValue("DwC:associatedSequences", Constants.DwCURI, accessionNumber); - } - if ( null != otherAccessionNumber ) { - //list of previous or alternate fully catalog numbers (i.e. Genbank) or human-used identifiers - xmlOTU.addAnnotationValue("DwC:otherCatalogNumbers", Constants.DwCURI, otherAccessionNumber); - } - if ( null != dateSampled ) { - //output date sampled in ISO 8601 format - xmlOTU.addAnnotationValue("DwC:eventDate", Constants.DwCURI, dateSampled); - } - if ( null != scientificName ) { - //output full scientific name - xmlOTU.addAnnotationValue("DwC:scientificName", Constants.DwCURI, scientificName); - } - if ( null != collector ) { - //output names of people associated with recording of original occurrence - xmlOTU.addAnnotationValue("DwC:recordedBy", Constants.DwCURI, collector); - } - if ( null != latitude ) { - //output geographic latitude in decimal degrees using geodeticDatum spatial reference system - xmlOTU.addAnnotationValue("DwC:decimalLatitude", Constants.DwCURI, latitude); - } - if ( null != longitude ) { - //output geographic longitude in decimal degrees using geodeticDatum spatial reference system - xmlOTU.addAnnotationValue("DwC:decimalLongitude", Constants.DwCURI, longitude); - } - if ( null != elevation ) { - //there are two different Darwin Core terms for elevation depending on elevation value - //outputs geographic elevation of sample - if ( elevation >= 0) { - //above local surface in meters - xmlOTU.addAnnotationValue("DwC:verbatimElevation", Constants.DwCURI, elevation); - } - else { - //below local surface in meters - xmlOTU.addAnnotationValue("DwC:verbatimDepth", Constants.DwCURI, elevation); - } - } - if ( null != country ) { - //output country in which location occurs - xmlOTU.addAnnotationValue("DwC:country", Constants.DwCURI, country); - } - if ( null != state ) { - //output name of next smaller administrative region than country (i.e. state, province, region) - xmlOTU.addAnnotationValue ("DwC:stateProvince", Constants.DwCURI, state); - } - if ( null != locality) { - //output brief description of sample location - xmlOTU.addAnnotationValue("DwC:locality", Constants.DwCURI, locality); - } - if ( null != notes ) { - //output any additional information about specimen - xmlOTU.addAnnotationValue("DwC:occurenceRemarks", Constants.DwCURI, notes); - } } - } + charIndex++; + } } /** @@ -542,7 +399,7 @@ * @param xmlMatrix * @param tbMatrix */ - private void populateXmlMatrix(org.nexml.model.ContinuousMatrix xmlMatrix, + private void populateContinuousNexmlMatrix(org.nexml.model.ContinuousMatrix xmlMatrix, ContinuousMatrix tbMatrix) { List<org.nexml.model.Character> characterList = xmlMatrix.getCharacters(); OTUs xmlOTUs = xmlMatrix.getOTUs(); @@ -563,100 +420,100 @@ } Set<RowSegment> tbSegments = tbRow.getSegmentsReadOnly(); for ( RowSegment tbSegment : tbSegments ) { - //declare variables for row-segment annotations - String title = tbSegment.getTitle(); - String institutionCode = tbSegment.getSpecimenLabel().getInstAcronym(); - String collectionCode = tbSegment.getSpecimenLabel().getCollectionCode(); - String catalogNumber = tbSegment.getSpecimenLabel().getCatalogNumber(); - String accessionNumber = tbSegment.getSpecimenLabel().getGenBankAccession(); - String otherAccessionNumber = tbSegment.getSpecimenLabel().getOtherAccession(); - String dateSampled = tbSegment.getSpecimenLabel().getSampleDateString(); - String scientificName = tbSegment.getSpecimenTaxonLabelAsString(); - String collector = tbSegment.getSpecimenLabel().getCollector(); - Double latitude = tbSegment.getSpecimenLabel().getLatitude(); - Double longitude = tbSegment.getSpecimenLabel().getLongitude(); - Double elevation = tbSegment.getSpecimenLabel().getElevation(); - String country = tbSegment.getSpecimenLabel().getCountry(); - String state = tbSegment.getSpecimenLabel().getState(); - String locality = tbSegment.getSpecimenLabel().getLocality(); - String notes = tbSegment.getSpecimenLabel().getNotes(); - - //if the value is not null, output the xmlOTU annotation. - //DwC refers to the Darwin Core term vocabulary for the associated annotation - if (null != title){ - //output name identifying the data set from which the record was derived - xmlOTU.addAnnotationValue("DwC:datasetName", Constants.DwCURI, title); - } - if ( null != institutionCode ) { - //output name or acronym of institution that has custody of information referred to in record - xmlOTU.addAnnotationValue("DwC:institutionCode", Constants.DwCURI, institutionCode); - } - if ( null != collectionCode ) { - //output name or code that identifies collection or data set from which record was derived - xmlOTU.addAnnotationValue ("DwC:collectionCode", Constants.DwCURI, collectionCode); - } - if ( null != catalogNumber ){ - //output unique (usually) identifier for the record within data set or collection - xmlOTU.addAnnotationValue("DwC:catalogNumber", Constants.DwCURI, catalogNumber); - } - if ( null != accessionNumber) { - //output a list of genetic sequence information associated with occurrence - xmlOTU.addAnnotationValue("DwC:associatedSequences", Constants.DwCURI, accessionNumber); - } - if ( null != otherAccessionNumber ) { - //list of previous or alternate fully catalog numbers (i.e. Genbank) or human-used identifiers - xmlOTU.addAnnotationValue("DwC:otherCatalogNumbers", Constants.DwCURI, otherAccessionNumber); - } - if ( null != dateSampled ) { - //output date sampled in ISO 8601 format - xmlOTU.addAnnotationValue("DwC:eventDate", Constants.DwCURI, dateSampled); - } - if ( null != scientificName ) { - //output full scientific name - xmlOTU.addAnnotationValue("DwC:scientificName", Constants.DwCURI, scientificName); - } - if ( null != collector ) { - //output names of people associated with recording of original occurrence - xmlOTU.addAnnotationValue("DwC:recordedBy", Constants.DwCURI, collector); - } - if ( null != latitude ) { - //output geographic latitude in decimal degrees using geodeticDatum spatial reference system - xmlOTU.addAnnotationValue("DwC:decimalLatitude", Constants.DwCURI, latitude); - } - if ( null != longitude ) { - //output geographic longitude in decimal degrees using geodeticDatum spatial reference system - xmlOTU.addAnnotationValue("DwC:decimalLongitude", Constants.DwCURI, longitude); - } - if ( null != elevation ) { - //there are two different Darwin Core terms for elevation depending on elevation value - //outputs geographic elevation of sample - if ( elevation >= 0) { - //above local surface in meters - xmlOTU.addAnnotationValue("D... [truncated message content] |
From: <rv...@us...> - 2012-02-04 00:10:20
|
Revision: 1052 http://treebase.svn.sourceforge.net/treebase/?rev=1052&view=rev Author: rvos Date: 2012-02-04 00:10:14 +0000 (Sat, 04 Feb 2012) Log Message: ----------- Now checks whether Darwin Core semantic annotation values are empty strings (which, it appears, they might be). Modified Paths: -------------- trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java Modified: trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java =================================================================== --- trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java 2012-02-03 23:47:25 UTC (rev 1051) +++ trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java 2012-02-04 00:10:14 UTC (rev 1052) @@ -8,6 +8,7 @@ import java.util.Set; import org.cipres.treebase.Constants; +import org.cipres.treebase.TreebaseUtil; import org.cipres.treebase.domain.matrix.CharSet; import org.cipres.treebase.domain.matrix.CharacterMatrix; import org.cipres.treebase.domain.matrix.ColumnRange; @@ -451,7 +452,7 @@ for ( String predicate : predicateToObjectMap.keySet() ) { String objectString = predicateToObjectMap.get(predicate); - if ( null != objectString ) { + if ( ! TreebaseUtil.isEmpty(objectString) ) { xmlAnnotatable.addAnnotationValue(predicate, Constants.DwCURI, objectString); } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rv...@us...> - 2012-02-04 09:26:36
|
Revision: 1054 http://treebase.svn.sourceforge.net/treebase/?rev=1054&view=rev Author: rvos Date: 2012-02-04 09:26:30 +0000 (Sat, 04 Feb 2012) Log Message: ----------- Setting matrix to always compact Modified Paths: -------------- trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java Modified: trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java =================================================================== --- trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java 2012-02-04 00:23:33 UTC (rev 1053) +++ trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java 2012-02-04 09:26:30 UTC (rev 1054) @@ -43,8 +43,8 @@ public class NexmlMatrixWriter extends NexmlObjectConverter { - private static final int MAX_GRANULAR_NCHAR = 1000; - private static final int MAX_GRANULAR_NTAX = 30; + private static final int MAX_GRANULAR_NCHAR = 0; + private static final int MAX_GRANULAR_NTAX = 0; /** * This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rv...@us...> - 2012-02-05 15:23:16
|
Revision: 1055 http://treebase.svn.sourceforge.net/treebase/?rev=1055&view=rev Author: rvos Date: 2012-02-05 15:23:09 +0000 (Sun, 05 Feb 2012) Log Message: ----------- Now uses org.nexml.model.MatrixRow objects to attach Darwin Core annotations Modified Paths: -------------- trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java Modified: trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java =================================================================== --- trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java 2012-02-04 09:26:30 UTC (rev 1054) +++ trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java 2012-02-05 15:23:09 UTC (rev 1055) @@ -30,6 +30,7 @@ import org.cipres.treebase.domain.taxon.SpecimenLabel; import org.cipres.treebase.domain.taxon.TaxonLabelHome; import org.nexml.model.Annotatable; +import org.nexml.model.Annotation; import org.nexml.model.CategoricalMatrix; import org.nexml.model.CharacterState; import org.nexml.model.CharacterStateSet; @@ -340,6 +341,8 @@ } else { String seq = tbRow.getNormalizedSymbolString(); + + // In NeXML, 'standard' data needs to be space-separated if ( tbMatrix.getDataType().getDescription().equals(MatrixDataType.MATRIX_DATATYPE_STANDARD) ) { StringBuilder sb = new StringBuilder(); for ( int i = 0; i < seq.length(); i++ ) { @@ -348,19 +351,23 @@ sb.append(' '); } } - } + } xmlMatrix.setSeq(seq,xmlOTU); // this often only happens once, when the row has only 1 segment for ( RowSegment tbSegment : tbSegments ) { - copyDarwinCoreAnnotations(tbSegment, xmlOTU); + org.nexml.model.MatrixRow<CharacterState> xmlRow = xmlMatrix.getRowObject(xmlOTU); + Annotation xmlSegment = xmlRow.addAnnotationValue("tb:rowSegment", Constants.TBTermsURI, new String()); + xmlSegment.addAnnotationValue("tb:startIndex", Constants.TBTermsURI, tbSegment.getStartIndex()); + xmlSegment.addAnnotationValue("tb:endIndex", Constants.TBTermsURI, tbSegment.getEndIndex()); + copyDarwinCoreAnnotations(tbSegment, xmlSegment); } } } } /** - * + * XXX this never executes, we always make compact matrices - RAV 5/2/2012 * @param xmlMatrix * @param tbMatrix * @param xmlCharacterList @@ -421,7 +428,11 @@ } Set<RowSegment> tbSegments = tbRow.getSegmentsReadOnly(); for ( RowSegment tbSegment : tbSegments ) { - copyDarwinCoreAnnotations(tbSegment,xmlOTU); + org.nexml.model.MatrixRow<Double> xmlRow = xmlMatrix.getRowObject(xmlOTU); + Annotation xmlSegment = xmlRow.addAnnotationValue("tb:rowSegment", Constants.TBTermsURI, new String()); + xmlSegment.addAnnotationValue("tb:startIndex", Constants.TBTermsURI, tbSegment.getStartIndex()); + xmlSegment.addAnnotationValue("tb:endIndex", Constants.TBTermsURI, tbSegment.getEndIndex()); + copyDarwinCoreAnnotations(tbSegment,xmlSegment); } } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rv...@us...> - 2012-02-05 19:24:35
|
Revision: 1056 http://treebase.svn.sourceforge.net/treebase/?rev=1056&view=rev Author: rvos Date: 2012-02-05 19:24:29 +0000 (Sun, 05 Feb 2012) Log Message: ----------- Now attaches MatrixRow IDs to <row> elements Modified Paths: -------------- trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java Modified: trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java =================================================================== --- trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java 2012-02-05 15:23:09 UTC (rev 1055) +++ trunk/treebase-core/src/main/java/org/cipres/treebase/domain/nexus/nexml/NexmlMatrixWriter.java 2012-02-05 19:24:29 UTC (rev 1056) @@ -353,10 +353,11 @@ } } xmlMatrix.setSeq(seq,xmlOTU); + org.nexml.model.MatrixRow<CharacterState> xmlRow = xmlMatrix.getRowObject(xmlOTU); + attachTreeBaseID(xmlRow, tbRow, MatrixRow.class); // this often only happens once, when the row has only 1 segment - for ( RowSegment tbSegment : tbSegments ) { - org.nexml.model.MatrixRow<CharacterState> xmlRow = xmlMatrix.getRowObject(xmlOTU); + for ( RowSegment tbSegment : tbSegments ) { Annotation xmlSegment = xmlRow.addAnnotationValue("tb:rowSegment", Constants.TBTermsURI, new String()); xmlSegment.addAnnotationValue("tb:startIndex", Constants.TBTermsURI, tbSegment.getStartIndex()); xmlSegment.addAnnotationValue("tb:endIndex", Constants.TBTermsURI, tbSegment.getEndIndex()); @@ -427,8 +428,9 @@ xmlMatrix.setSeq(seq,xmlOTU); } Set<RowSegment> tbSegments = tbRow.getSegmentsReadOnly(); - for ( RowSegment tbSegment : tbSegments ) { - org.nexml.model.MatrixRow<Double> xmlRow = xmlMatrix.getRowObject(xmlOTU); + org.nexml.model.MatrixRow<Double> xmlRow = xmlMatrix.getRowObject(xmlOTU); + attachTreeBaseID(xmlRow, tbRow, MatrixRow.class); + for ( RowSegment tbSegment : tbSegments ) { Annotation xmlSegment = xmlRow.addAnnotationValue("tb:rowSegment", Constants.TBTermsURI, new String()); xmlSegment.addAnnotationValue("tb:startIndex", Constants.TBTermsURI, tbSegment.getStartIndex()); xmlSegment.addAnnotationValue("tb:endIndex", Constants.TBTermsURI, tbSegment.getEndIndex()); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |