From: Suzanna L. <su...@us...> - 2003-04-23 22:15:51
|
Update of /cvsroot/gmod/apollo/src/java/apollo/analysis/filter In directory sc8-pr-cvs1:/tmp/cvs-serv6176/apollo/analysis/filter Modified Files: BlastFilter.java BlastFilterPanel.java Coverage.java Log Message: filtering of BLAST results now working on expect, score, percent_identity, and separating of HSPs into individual hits if so desired (for genome to genome analysis). few small changes to the gui (still needs history to be added). also moved PropertyScheme out of Config and into Style. This way it persists between opens. Before it was creating a new PropertyScheme and reparsing the tiers.dat file every single time something new was loaded. Needed to do this in preparation for layering of BLAST results which does not want to replaced the current set of tiers and types, but merely extend them. Now that the PropertyScheme is retained this is possible. Index: BlastFilter.java =================================================================== RCS file: /cvsroot/gmod/apollo/src/java/apollo/analysis/filter/BlastFilter.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** BlastFilter.java 23 Apr 2003 00:18:32 -0000 1.1 --- BlastFilter.java 23 Apr 2003 22:15:10 -0000 1.2 *************** *** 27,30 **** --- 27,38 ---- FeatureSetI reverse_analysis = getAnalysis (curation, analysis_type, -1); + // may delete spans from the hit, + // but not the hit itself + // THIS MUST PRECEDE SPLITTING!! + if (filter_input.useCoincidence()) { + int percent_overlap = filter_input.getCoincidence(); + cleanUpCoincidents (forward_analysis, percent_overlap); + cleanUpCoincidents (reverse_analysis, percent_overlap); + } if (filter_input.useAutonomousHSPs()) { promoteHSPsToHits(forward_analysis); *************** *** 51,60 **** private void processHits (FeatureSetI analysis, AnalysisInput filter_input) { - boolean remove_coincidents = filter_input.useCoincidence(); boolean use_expect = filter_input.useExpect(); ! double min_expect = filter_input.getMinExpect(); if (use_expect) ! System.out.println ("Removing hits with expect > " + min_expect); int hit_index = 0; --- 59,75 ---- private void processHits (FeatureSetI analysis, AnalysisInput filter_input) { boolean use_expect = filter_input.useExpect(); ! double max_expect = filter_input.getMaxExpect(); ! boolean use_score = filter_input.useScore(); ! int min_score = filter_input.getMinScore(); ! boolean use_identity = filter_input.useIdentity(); ! int min_identity = filter_input.getMinIdentity(); if (use_expect) ! System.out.println ("Removing hits with expect > " + max_expect); ! if (use_score) ! System.out.println ("Removing hits with score < " + min_score); ! if (use_identity) ! System.out.println ("Removing hits with identity < " + min_identity); int hit_index = 0; *************** *** 64,88 **** (FeatureSetI) analysis.getFeatureAt (hit_index + 1) : null); ! if (use_expect && hit.getScore("expect") > min_expect) { ! /** A minimum expectation value may also be applied * as a simple threshhold value. Spans with an expect * greater than this value are discarded. <p> * This value is set in the preferences file as ! * -min_expect 1.0e-4 */ removeHit (hit, analysis); - System.out.println ("Deleted high expect hit at " + - hit.getStart() + "-" + - hit.getEnd() + " expect=" + - hit.getScore("expect")); - } ! else { ! // may delete spans from the hit, ! // but not the hit itself ! // THIS MUST PRECEDE SPLITTING!! ! if (remove_coincidents) ! cleanUpCoincidents (hit, filter_input.getCoincidence()); ! /* not just yet, but soon Vector ordered_hits = splitHit (hit, hits, analysis, filter_input); --- 79,98 ---- (FeatureSetI) analysis.getFeatureAt (hit_index + 1) : null); ! if (use_expect && hit.getScore("expect") > max_expect) { ! /** A maximum expectation value may also be applied * as a simple threshhold value. Spans with an expect * greater than this value are discarded. <p> * This value is set in the preferences file as ! * -max_expect 1.0e-4 */ removeHit (hit, analysis); } ! else if (use_score && hit.getScore() < min_score) { ! removeHit (hit, analysis); ! } ! else if (use_identity && lowIdentity(hit, min_identity)) { ! removeHit (hit, analysis); ! } ! else { /* not just yet, but soon Vector ordered_hits = splitHit (hit, hits, analysis, filter_input); *************** *** 283,286 **** --- 293,305 ---- } + private void debugFeature (SeqFeatureI sf, String prefix) { + System.out.println (prefix + + sf.getStart() + "-" + + sf.getEnd() + " expect=" + + sf.getScore("expect") + " score=" + + sf.getScore()); + + } + private FeatureSetI getClosestOnGenomic (SeqFeatureI span, Vector split_hits) { FeatureSetI bin_hit = null; *************** *** 332,341 **** private FeatureSetI makeNewHit (FeatureSetI old_hit, FeaturePairI span) { FeatureSetI new_hit = new FeatureSet (); new_hit.setRefSequence (old_hit.getRefSequence()); new_hit.setType (old_hit.getType()); new_hit.setAlignedSequence (old_hit.getAlignedSequence()); ! addToHit (span, new_hit); ! return new_hit; } --- 351,364 ---- private FeatureSetI makeNewHit (FeatureSetI old_hit, FeaturePairI span) { FeatureSetI new_hit = new FeatureSet (); + new_hit.setStrand (old_hit.getStrand()); + new_hit.setName (old_hit.getName()); new_hit.setRefSequence (old_hit.getRefSequence()); new_hit.setType (old_hit.getType()); new_hit.setAlignedSequence (old_hit.getAlignedSequence()); ! new_hit.setProgramName(old_hit.getProgramName()); ! new_hit.setDatabase (old_hit.getDatabase()); addToHit (span, new_hit); ! if (new_hit.getScore("expect") < 0) ! debugFeature (span,"Why is expect not set for "); return new_hit; } *************** *** 343,362 **** private void addToHit (SeqFeatureI span, FeatureSetI hit) { hit.addFeature (span, true); ! hit.setScore (-1); ! setHitScore (hit); ! } ! ! private void setHitScore (FeatureSetI hit) { int score; - for (int i = 0; i < hit.size (); i++) { ! SeqFeatureI span = (SeqFeatureI) hit.getFeatureAt(i); ! if (span.getScore() > hit.getScore() ! || hit.getScore() == -1 ! || ( span.getScore() == hit.getScore() ! && span.getScore("expect") < hit.getScore("expect"))) { ! hit.setScore (span.getScore()); ! hit.addScore ("expect", span.getScore("expect")); ! hit.addScore ("probability", span.getScore("probability")); } } --- 366,377 ---- private void addToHit (SeqFeatureI span, FeatureSetI hit) { hit.addFeature (span, true); ! double hit_expect = hit.getScore ("expect"); int score; for (int i = 0; i < hit.size (); i++) { ! SeqFeatureI hsp = (SeqFeatureI) hit.getFeatureAt(i); ! if (hit_expect < 0 || hsp.getScore("expect") < hit_expect) { ! hit.setScore (hsp.getScore()); ! hit.addScore ("expect", hsp.getScore("expect")); ! hit.addScore ("probability", hsp.getScore("probability")); } } *************** *** 406,436 **** } ! private void cleanUpCoincidents (FeatureSetI hit, int coincidence) { ! /* handle spans that overlap to a significant degree */ ! int i; ! SeqFeatureI span1, span2; ! boolean cleanup = true; ! while (cleanup) { ! cleanup = false; ! i = 0; ! while ((i + 1) < hit.size() && !cleanup) { ! span1 = (FeatureSetI) hit.getFeatureAt (i); ! span2 = (FeatureSetI) hit.getFeatureAt (i + 1); ! ! cleanup = spansOverlap (hit, span1, span2, coincidence); ! if (cleanup) { ! removeWeakerSpan (hit, span1, span2); ! System.out.println ("Deleted coincident span at " + ! hit.getStart() + "-" + ! hit.getEnd() + " expect=" + ! hit.getScore("expect")); ! i = 0; ! } ! else { ! i++; } } } } --- 421,467 ---- } ! protected boolean lowIdentity (FeatureSetI hit, int min_identity) { ! int span_cnt = hit.size(); ! for (int i = span_cnt - 1; i >= 0; i--) { ! FeaturePairI span = (FeaturePairI) hit.getFeatureAt(i); ! if (identity(span) < min_identity) { ! hit.deleteFeature (span); ! } ! } ! return hit.size() <= 0; ! } ! private void cleanUpCoincidents (FeatureSetI analysis, int coincidence) { ! /* handle spans that overlap to a significant degree */ ! int hit_index = 0; ! while (hit_index < analysis.size ()) { ! FeatureSetI hit = (FeatureSetI) analysis.getFeatureAt (hit_index); ! int i; ! SeqFeatureI span1, span2; ! boolean cleanup = true; ! while (cleanup) { ! cleanup = false; ! i = 0; ! while ((i + 1) < hit.size() && !cleanup) { ! span1 = (FeaturePairI) hit.getFeatureAt (i); ! span2 = (FeaturePairI) hit.getFeatureAt (i + 1); ! ! cleanup = spansOverlap (hit, span1, span2, coincidence); ! if (cleanup) { ! removeWeakerSpan (hit, span1, span2); ! i = 0; ! } ! else { ! i++; ! } } } + if (hit.size() == 0) { + analysis.deleteFeature (hit); + } + else { + hit_index++; + } } } *************** *** 512,532 **** FeatureSetI hit = (FeatureSetI) region.elementAt (j); SequenceI align_seq = hit.getAlignedSequence(); - boolean shadow = false; int k = j + 1; String hit_name = align_seq.getDisplayId(); FeatureSetI close_hit; ! while (!shadow && k < region.size()) { close_hit = (FeatureSetI) region.elementAt (k); SequenceI close_seq = close_hit.getAlignedSequence(); /* same sequence, but opposite strand */ - System.out.println ("Comparing " + - hit.getStart() + "-" + - hit.getEnd() + " to " + - close_hit.getStart() + "-" + - close_hit.getEnd()); if (hit_name.equals (close_seq.getDisplayId()) && (hit.getStrand() != close_hit.getStrand())) { - shadow = true; // assuming that the region list is presorted // with the stronger hit first --- 543,556 ---- FeatureSetI hit = (FeatureSetI) region.elementAt (j); SequenceI align_seq = hit.getAlignedSequence(); int k = j + 1; String hit_name = align_seq.getDisplayId(); FeatureSetI close_hit; ! while (k < region.size()) { close_hit = (FeatureSetI) region.elementAt (k); SequenceI close_seq = close_hit.getAlignedSequence(); /* same sequence, but opposite strand */ if (hit_name.equals (close_seq.getDisplayId()) && (hit.getStrand() != close_hit.getStrand())) { // assuming that the region list is presorted // with the stronger hit first *************** *** 536,544 **** else removeHit (close_hit, reverse_analysis); - System.out.println ("Deleted shadow hit at " + - close_hit.getStart() + "-" + - close_hit.getEnd() + " expect=" + - close_hit.getScore("expect")); - } else { --- 560,563 ---- *************** *** 559,566 **** curation.getResults().getReverseSet()); ! for (int i = 0; i < analyses.size(); i++) { FeatureSetI analysis = (FeatureSetI) analyses.getFeatureAt(i); - System.out.println ("Want type " + type + " comparing to " + - analysis.getType()); the_one = ((analysis.getType() != null && analysis.getType().equals (type)) ? --- 578,583 ---- curation.getResults().getReverseSet()); ! for (int i = 0; i < analyses.size() && the_one == null; i++) { FeatureSetI analysis = (FeatureSetI) analyses.getFeatureAt(i); the_one = ((analysis.getType() != null && analysis.getType().equals (type)) ? Index: BlastFilterPanel.java =================================================================== RCS file: /cvsroot/gmod/apollo/src/java/apollo/analysis/filter/BlastFilterPanel.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** BlastFilterPanel.java 23 Apr 2003 00:18:32 -0000 1.1 --- BlastFilterPanel.java 23 Apr 2003 22:15:11 -0000 1.2 *************** *** 15,18 **** --- 15,19 ---- import apollo.datamodel.GenomicRange; import apollo.dataadapter.analysis.AnalysisInput; + import apollo.gui.Config; /** *************** *** 24,43 **** private JPanel filterConfigurationPanel; private JButton showFilterConfiguration; ! private JTextField min_expect = new JTextField ("4"); private JCheckBox use_expect = new JCheckBox ("Limit by expect threshold", true); private JTextField min_score = new JTextField ("50"); private JCheckBox use_score = new JCheckBox ("Limit by score threshold", false); private JTextField min_length = new JTextField ("50"); ! private JToggleButton use_length = new JCheckBox ("Limit by percent of sequence length", false); ! private JTextField min_coincidence = new JTextField ("100"); ! private JCheckBox use_coincidence = new JCheckBox ("Remove Overlapping HSPs ", true); private JTextField max_coverage = new JTextField ("10"); ! private JCheckBox limit_coverage = new JCheckBox ("Limit depth of coverage", true); private JTextField word_size = new JTextField ("2"); private JTextField bitscore_threshold = new JTextField ("15"); ! private JCheckBox remove_lowinfo = new JCheckBox ("Remove simple repeat HSPs", true); ! private JCheckBox remove_shadows = new JCheckBox("Remove shadows", true); ! private JCheckBox split_dups = new JCheckBox("Split duplicated alignments separate hits", true); ! private JCheckBox split_HSPs = new JCheckBox("Treat separate HSPs as autonomous hits", false); public BlastFilterPanel () { --- 25,46 ---- private JPanel filterConfigurationPanel; private JButton showFilterConfiguration; ! private JTextField min_expect = new JTextField ("1.0e-4"); private JCheckBox use_expect = new JCheckBox ("Limit by expect threshold", true); private JTextField min_score = new JTextField ("50"); private JCheckBox use_score = new JCheckBox ("Limit by score threshold", false); + private JTextField min_identity = new JTextField ("50"); + private JCheckBox use_identity = new JCheckBox ("Limit by percent identity", false); private JTextField min_length = new JTextField ("50"); ! private JToggleButton use_length = new JCheckBox ("Limit by percent of mRNA or protein sequence length", false); ! private JTextField min_coincidence = new JTextField ("80"); ! private JCheckBox use_coincidence = new JCheckBox ("Remove Overlapping HSPs within hits", false); private JTextField max_coverage = new JTextField ("10"); ! private JCheckBox limit_coverage = new JCheckBox ("Limit depth of coverage", false); private JTextField word_size = new JTextField ("2"); private JTextField bitscore_threshold = new JTextField ("15"); ! private JCheckBox remove_lowinfo = new JCheckBox ("Remove simple repeat HSPs", false); ! private JCheckBox remove_shadows = new JCheckBox("Remove shadowing hits on opposite strand", true); ! private JCheckBox split_dups = new JCheckBox("Split tandemly duplicated alignments into separate hits", false); ! private JCheckBox split_HSPs = new JCheckBox("Treat each HSPs as separate autonomous hits (for genomic to genomic)", true); public BlastFilterPanel () { *************** *** 49,80 **** int row = 0; ! min_expect.setPreferredSize(new Dimension(50,15)); ! add(min_expect, makeConstraintAt(0,row,1)); ! add(use_expect, makeConstraintAt(1,row++,1)); ! min_score.setPreferredSize(new Dimension(50,15)); ! add(min_score, makeConstraintAt(0,row,1)); ! add(use_score, makeConstraintAt(1,row++,1)); ! min_length.setPreferredSize(new Dimension(50,15)); ! add(min_length, makeConstraintAt(0,row,1)); ! add(use_length, makeConstraintAt(1,row++,1)); ! min_coincidence.setPreferredSize(new Dimension(50,15)); ! add(min_coincidence, makeConstraintAt(0,row,1)); ! add(use_coincidence, makeConstraintAt(1,row++,1)); ! max_coverage.setPreferredSize(new Dimension(50,15)); ! add(max_coverage, makeConstraintAt(0,row,1)); ! add(limit_coverage, makeConstraintAt(1,row++,1)); ! word_size.setPreferredSize(new Dimension(50,15)); ! add(word_size, makeConstraintAt(0,row,1)); ! bitscore_threshold.setPreferredSize(new Dimension(50,15)); ! add(bitscore_threshold, makeConstraintAt(1,row,1)); ! add(remove_lowinfo, makeConstraintAt(2,row++,1)); - add(remove_shadows, makeConstraintAt(0,row++,1)); add(split_dups, makeConstraintAt(0,row++,1)); add(split_HSPs, makeConstraintAt(0,row++,1)); --- 52,90 ---- int row = 0; ! min_expect.setPreferredSize(new Dimension(80,15)); ! add(use_expect, makeConstraintAt(0,row,1)); ! add(min_expect, makeConstraintAt(1,row,1)); ! add(makeJLabelWithFont("maximum expect value"), makeConstraintAt(2,row++,1)); ! min_score.setPreferredSize(new Dimension(80,15)); ! add(use_score, makeConstraintAt(0,row,1)); ! add(min_score, makeConstraintAt(1,row++,1)); ! min_identity.setPreferredSize(new Dimension(80,15)); ! add(use_identity, makeConstraintAt(0,row,1)); ! add(min_identity, makeConstraintAt(1,row++,1)); ! min_length.setPreferredSize(new Dimension(80,15)); ! add(use_length, makeConstraintAt(0,row,1)); ! add(min_length, makeConstraintAt(1,row++,1)); ! min_coincidence.setPreferredSize(new Dimension(80,15)); ! add(use_coincidence, makeConstraintAt(0,row,1)); ! add(min_coincidence, makeConstraintAt(1,row++,1)); ! ! max_coverage.setPreferredSize(new Dimension(80,15)); ! add(limit_coverage, makeConstraintAt(0,row,1)); ! add(max_coverage, makeConstraintAt(1,row++,1)); ! word_size.setPreferredSize(new Dimension(40,15)); ! bitscore_threshold.setPreferredSize(new Dimension(40,15)); ! add(remove_lowinfo, makeConstraintAt(0,row,1)); ! add(word_size, makeConstraintAt(1,row,1)); ! add(makeJLabelWithFont("word size"), makeConstraintAt(2,row,1)); ! add(bitscore_threshold, makeConstraintAt(3,row,1)); ! add(makeJLabelWithFont("maximum bits"), makeConstraintAt(4,row++,1)); add(split_dups, makeConstraintAt(0,row++,1)); + add(remove_shadows, makeConstraintAt(0,row++,1)); add(split_HSPs, makeConstraintAt(0,row++,1)); *************** *** 93,96 **** --- 103,108 ---- stateInfo.setProperty("min_score", min_score.getText()); stateInfo.setProperty("use_score", use_score.isSelected() ? "true" : "false"); + stateInfo.setProperty("min_identity", min_identity.getText()); + stateInfo.setProperty("use_identity", use_identity.isSelected() ? "true" : "false"); stateInfo.setProperty("min_length", min_length.getText()); stateInfo.setProperty("use_length", use_length.isSelected() ? "true" : "false"); *************** *** 143,149 **** public void setInputs(AnalysisInput inputs) { if (use_expect.isSelected()) ! inputs.setMinExpect(min_expect.getText()); else ! inputs.setMinExpect(apollo.dataadapter.analysis.AnalysisInput.NO_LIMIT); if (use_score.isSelected()) --- 155,161 ---- public void setInputs(AnalysisInput inputs) { if (use_expect.isSelected()) ! inputs.setMaxExpect(min_expect.getText()); else ! inputs.setMaxExpect(apollo.dataadapter.analysis.AnalysisInput.NO_LIMIT); if (use_score.isSelected()) *************** *** 152,155 **** --- 164,172 ---- inputs.setMinScore(apollo.dataadapter.analysis.AnalysisInput.NO_LIMIT); + if (use_identity.isSelected()) + inputs.setMinIdentity(min_identity.getText()); + else + inputs.setMinIdentity(apollo.dataadapter.analysis.AnalysisInput.NO_LIMIT); + if (use_length.isSelected()) inputs.setMinLength(min_length.getText()); *************** *** 197,200 **** --- 214,232 ---- return gbc; }//end makeConstraintAt + + private JLabel makeJLabelWithFont(String label) { + String l; + if (Config.isUnix()) + l = "<html><FONT FACE=Geneva,Arial,Helvetica size=1 color=black><B>"+label+"</B>"; + else + l = "<html><FONT FACE=Geneva,Arial,Helvetica color=black><B>"+label+"</B>"; + JLabel jLabel = new JLabel(l); + // I seem to get the best results if I set both preferred and max. + // This might need more tweeking. I didnt think max would be needed + // with the horizontal glue added below, but it makes a difference. + jLabel.setPreferredSize(new Dimension(120,40)); + jLabel.setMaximumSize(new Dimension(200,70)); + return jLabel; + } } Index: Coverage.java =================================================================== RCS file: /cvsroot/gmod/apollo/src/java/apollo/analysis/filter/Coverage.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** Coverage.java 23 Apr 2003 00:18:32 -0000 1.1 --- Coverage.java 23 Apr 2003 22:15:11 -0000 1.2 *************** *** 15,20 **** public static Vector sortRegions (FeatureSetI forward_analysis, FeatureSetI reverse_analysis) { ! Vector remainders = (Vector) forward_analysis.getFeatures().clone(); ! remainders.addAll ((Vector) reverse_analysis.getFeatures().clone()); Vector regions = new Vector (); System.out.println ("hits on forward: " + forward_analysis.size() + --- 15,22 ---- public static Vector sortRegions (FeatureSetI forward_analysis, FeatureSetI reverse_analysis) { ! ! Vector remainders = new Vector(); ! remainders.addAll (forward_analysis.getFeatures()); ! remainders.addAll (reverse_analysis.getFeatures()); Vector regions = new Vector (); System.out.println ("hits on forward: " + forward_analysis.size() + |