From: <pcb...@us...> - 2013-03-26 22:20:49
|
Revision: 4444 http://sourceforge.net/p/proteowizard/code/4444 Author: pcbrefugee Date: 2013-03-26 22:20:46 +0000 (Tue, 26 Mar 2013) Log Message: ----------- [--filter documentation] Combed through code, old emails, google, etc to demystify the various filters presented by msconvert and msaccess etc. Updated the online help, including a new page just for filter information for ease of maintenance as new filters are added in future. Added --help option to msconvert and msaccess and family to show deeper detail in filters - the default oops-no-arguments --filter help text is now actually more terse than before for navigability, with information on how to get the greater detail by using --help. Modified Paths: -------------- trunk/pwiz/pwiz/analysis/spectrum_processing/SpectrumListFactory.cpp trunk/pwiz/pwiz/analysis/spectrum_processing/SpectrumListFactory.hpp trunk/pwiz/pwiz_tools/commandline/msconvert.cpp trunk/pwiz/pwiz_tools/common/MSDataAnalyzerApplication.cpp trunk/web/tools/msaccess.html trunk/web/tools/msconvert.html trunk/web/tools/tools_base.html Added Paths: ----------- trunk/web/tools/filters.html Modified: trunk/pwiz/pwiz/analysis/spectrum_processing/SpectrumListFactory.cpp =================================================================== --- trunk/pwiz/pwiz/analysis/spectrum_processing/SpectrumListFactory.cpp 2013-03-25 22:36:51 UTC (rev 4443) +++ trunk/pwiz/pwiz/analysis/spectrum_processing/SpectrumListFactory.cpp 2013-03-26 22:20:46 UTC (rev 4444) @@ -62,8 +62,8 @@ typedef SpectrumListPtr (*FilterCreator)(const MSData& msd, const string& arg); +typedef const char *UsageInfo[2]; // usage like <int_set>, and details - SpectrumListPtr filterCreator_index(const MSData& msd, const string& arg) { IntegerSet indexSet; @@ -73,8 +73,11 @@ SpectrumList_Filter(msd.run.spectrumListPtr, SpectrumList_FilterPredicate_IndexSet(indexSet))); } +UsageInfo usage_index = {"<index_value_set>", + "Selects spectra by index - an index value 0-based numerical order in which the spectrum appears in the input.\n" + " <index_value_set> is an int_set of indexes." +}; - SpectrumListPtr filterCreator_scanNumber(const MSData& msd, const string& arg) { IntegerSet scanNumberSet; @@ -84,8 +87,11 @@ SpectrumList_Filter(msd.run.spectrumListPtr, SpectrumList_FilterPredicate_ScanNumberSet(scanNumberSet))); } +UsageInfo usage_scanNumber = {"<scan_numbers>", + "This filter selects spectra by scan number. Depending on the input data type, scan number and spectrum index are not always the same thing - scan numbers are not always contiguous, and are usually 1-based.\n" + "<scan_numbers> is an int_set of scan numbers to be kept." +}; - SpectrumListPtr filterCreator_scanEvent(const MSData& msd, const string& arg) { IntegerSet scanEventSet; @@ -95,8 +101,13 @@ SpectrumList_Filter(msd.run.spectrumListPtr, SpectrumList_FilterPredicate_ScanEventSet(scanEventSet))); } +UsageInfo usage_scanEvent = {"<scan_event_set>","This filter selects spectra by scan event. For example, to include all scan events except scan event 5, use " + "filter \"scanEvent 1-4 6-\". A \"scan event\" is a preset scan configuration: a user-defined scan configuration that " + "specifies the instrumental settings in which a spectrum is acquired. An instrument may cycle through a list of preset " + "scan configurations to acquire data. This is a more generic term for the Thermo \"scan event\", which is defined in " + "the Thermo Xcalibur glossary as: \"a mass spectrometer scan that is defined by choosing the necessary scan parameter " + "settings. Multiple scan events can be defined for each segment of time.\"."}; - SpectrumListPtr filterCreator_scanTime(const MSData& msd, const string& arg) { double scanTimeLow = 0; @@ -107,21 +118,27 @@ iss >> open >> scanTimeLow >> comma >> scanTimeHigh >> close; if (open!='[' || comma!=',' || close!=']') + { + cerr << "scanTime filter argument does not have form \"[\"<startTime>,<endTime>\"]\", ignored." << endl; return SpectrumListPtr(); + } return SpectrumListPtr(new SpectrumList_Filter(msd.run.spectrumListPtr, SpectrumList_FilterPredicate_ScanTimeRange(scanTimeLow, scanTimeHigh))); } +UsageInfo usage_scanTime = {"<scan_time_range>", + "This filter selects only spectra within a given time range.\n" + " <scan_time_range> is a time range, specified in seconds. For example, to select only spectra within the " + "second minute of the run, use \"scanTime [60-119.99]\"."}; - SpectrumListPtr filterCreator_sortScanTime(const MSData& msd, const string& arg) { return SpectrumListPtr(new SpectrumList_Sorter(msd.run.spectrumListPtr, SpectrumList_SorterPredicate_ScanStartTime())); } +UsageInfo usage_sortScanTime = {"","This filter reorders spectra, sorting them by ascending scan start time."}; - SpectrumListPtr filterCreator_nativeCentroid(const MSData& msd, const string& arg) { istringstream parser(arg); @@ -141,6 +158,11 @@ preferVendor, msLevelsToCentroid)); } +UsageInfo usage_nativeCentroid = {"<prefer_vendor> <ms_levels>","This filter performs centroiding on spectra with the " + "selected <ms_levels>, expressed as an int_set. The value for <prefer_vendor> must be \"True\" or \"False\": when " + "True, vendor (Windows DLL) code is used if available. IMPORTANT NOTE: since this filter operates on the raw " + "data through the vendor DLLs, IT MUST BE THE FIRST FILTER IN ANY LIST OF FILTERS." +}; /** * Handler for --filter zeroSamples removeExtra|addMissing[=FlankingZeroCount] [mslevels] @@ -171,6 +193,23 @@ bRemover ? SpectrumList_ZeroSamplesFilter::Mode_RemoveExtraZeros : SpectrumList_ZeroSamplesFilter::Mode_AddMissingZeros, FlankingZeroCount)); } +UsageInfo usage_zeroSamples = {"<mode> [<MS_levels>]", + "This filter deals with zero values in spectra - either removing them, or adding them where they are missing.\n" + " <mode> is either removeExtra or addMissing[=<flankingZeroCount>] .\n" + " <MS_levels> is optional, when provided (as an int_set) the filter is applied only to spectra with those MS levels.\n" + "When <mode> is \"removeExtra\", consecutive zero intensity peaks are removed from spectra. For example, a peak list\n" + " \"100.1,1000 100.2,0 100.3,0 100.4,0 100.5,0 100.6,1030\"\n" + "would become \n" + " \"100.1,1000 100.2,0 100.5,0 100.6,1030\"\n" + "and a peak list \n" + " \"100.1,0 100.2,0 100.3,0 100.4,0 100.5,0 100.6,1030 100.7,0 100.8,1020 100.9,0 101.0,0\"\n" + "would become \n" + " \"100.5,0 100.6,1030 100.7,0 100.8,1020 100.9,0\"\n" + "When <mode> is \"addMissing\", each spectrum's sample rate is automatically determined (the rate can change but only " + "gradually) and flanking zeros are inserted around non-zero data points. The optional [=<flankingZeroCount>] value " + "can be used to limit the number of flanking zeros, otherwise the spectrum is completely populated between nonzero points. " + "For example, to make sure spectra have at least 5 flanking zeros around runs on nonzero points, use filter \"addMissing=5\"." +}; /** * Handler for --filter "ETDFilter". There are five optional arguments for this filter: @@ -192,9 +231,9 @@ string removeNeutralLoss; parser >> removeNeutralLoss; bool bRemNeutralLoss = removeNeutralLoss == "false" || removeNeutralLoss == "0" ? false : true; - string useBlanketFiltering; - parser >> useBlanketFiltering; - bool bUseBlanketFiltering = useBlanketFiltering == "false" || useBlanketFiltering == "0" ? false : true; + string useBlanketFiltering; + parser >> useBlanketFiltering; + bool bUseBlanketFiltering = useBlanketFiltering == "false" || useBlanketFiltering == "0" ? false : true; MZTolerance mzt(3.1); if (parser.good()) @@ -217,6 +256,13 @@ SpectrumList_PeakFilter(msd.run.spectrumListPtr, filter)); } +UsageInfo usage_ETDFilter = {"[<removePrecursor> [<removeChargeReduced> [<removeNeutralLoss> [<blanketRemoval> [<matchingTolerance> ]]]]]", + "Filters ETD MSn spectrum data points, removing unreacted precursors, charge-reduced precursors, and neutral losses.\n" + " <removePrecursor> - specify \"true\" to remove unreacted precursor (default is \"false\")\n" + " <removeChargeReduced> - specify \"true\" to remove charge reduced precursor (default is \"false\")\n" + " <removeNeutralLoss> - specify \"true\" to remove neutral loss species from charge reduced precursor (default is \"false\")\n" + " <matchingTolerance> - specify matching tolerance in MZ or PPM (examples: \"3.1 MZ\" (the default) or \"2.2 PPM\")" +}; SpectrumListPtr filterCreator_MS2Denoise(const MSData& msd, const string& arg) { @@ -242,6 +288,12 @@ SpectrumList_PeakFilter(msd.run.spectrumListPtr, filter)); } +UsageInfo usage_MS2Denoise = {"[<peaks_in_window> [<window_width_Da> [multicharge_fragment_relaxation]]]", + "A moving window filter for MS2.\n" + " <peaks_in_window> - the number peaks to select in window, default is 6.\n" + " <window_width_Da> - the width of the window in Da, default is 30.\n" + " <multicharge_fragment_relaxation> - if \"true\" (the default), allows more data below multiply charged precursors." +}; SpectrumListPtr filterCreator_MS2Deisotope(const MSData& msd, const string& arg) { @@ -268,9 +320,15 @@ SpectrumList_PeakFilter(msd.run.spectrumListPtr, filter)); } +UsageInfo usage_MS2Deisotope = { "[<hi_res> [<mz_tolerance>]]", + "Deisotopes ms2 spectra using Markey method.\n" + " <hi_res> sets high resolution mode to \"false\" (the default) or \"true\".\n" + " <mz_tolerance> sets the mz tolerance. It defaults to .01 in high resoltion mode, otherwise it defaults to 0.5." + }; struct StripIonTrapSurveyScans : public SpectrumList_Filter::Predicate { + virtual boost::logic::tribool accept(const SpectrumIdentity& spectrumIdentity) const { return boost::logic::indeterminate; // need full Spectrum @@ -290,24 +348,30 @@ } }; - SpectrumListPtr filterCreator_stripIT(const MSData& msd, const string& arg) { return SpectrumListPtr(new SpectrumList_Filter(msd.run.spectrumListPtr, StripIonTrapSurveyScans())); } +UsageInfo usage_stripIT={"","This filter rejects ion trap data spectra with MS level 1."}; SpectrumListPtr filterCreator_precursorRecalculation(const MSData& msd, const string& arg) { return SpectrumListPtr(new SpectrumList_PrecursorRecalculator(msd)); } +UsageInfo usage_precursorRecalculation = {"","This filter recalculates the precursor m/z and charge for MS2 spectra. " + "It looks at the prior MS1 scan to better infer the parent mass. However, it only works on orbitrap and FT data," + "although it does not use any 3rd party (vendor DLL) code. Since the time the code was written, Thermo has since fixed " + "up its own estimation in response, so it's less critical than it used to be (though can still be useful)."}; - SpectrumListPtr filterCreator_precursorRefine(const MSData& msd, const string& arg) { return SpectrumListPtr(new SpectrumList_PrecursorRefine(msd)); } +UsageInfo usage_precursorRefine = {"", "This filter recalculates the precursor m/z and charge for MS2 spectra. " + "It looks at the prior MS1 scan to better infer the parent mass. It only works on orbitrap, FT, and TOF data. " + "It does not use any 3rd party (vendor DLL) code."}; SpectrumListPtr filterCreator_mzWindow(const MSData& msd, const string& arg) { @@ -319,35 +383,50 @@ iss >> open >> mzLow >> comma >> mzHigh >> close; if (open!='[' || comma!=',' || close!=']') + { + cerr << "mzWindow filter expected an mzrange formatted something like \"[123.4,567.8]\"" << endl; return SpectrumListPtr(); + } return SpectrumListPtr(new SpectrumList_MZWindow(msd.run.spectrumListPtr, mzLow, mzHigh)); } +UsageInfo usage_mzWindow = {"<mzrange>", + "keeps mz/intensity pairs whose m/z values fall within the specified range.\n" + " <mzrange> is formatted as [mzLow,mzHigh]. For example, in msconvert to retain data in the m/z range " + "100.1 to 307.5, use --filter \"mzWindow [100.1,307.5]\" ." +}; SpectrumListPtr filterCreator_mzPrecursors(const MSData& msd, const string& arg) { char open='\0', comma='\0', close='\0'; - std::set<double> setMz; + std::set<double> setMz; istringstream iss(arg); iss >> open; - while (isdigit(iss.peek())) - { - double mz = 0; - iss >> mz; - setMz.insert(mz); - if (iss.peek() == ',') - iss >> comma; - } - iss >> close; + while (isdigit(iss.peek())) + { + double mz = 0; + iss >> mz; + setMz.insert(mz); + if (iss.peek() == ',') + iss >> comma; + } + iss >> close; if (open!='[' || close!=']') + { + cerr << "mzPrecursors filter expected a list of m/z values formatted something like \"[123.4,567.8,789.0]\"" << endl; return SpectrumListPtr(); - + } return SpectrumListPtr(new - SpectrumList_Filter(msd.run.spectrumListPtr, - SpectrumList_FilterPredicate_PrecursorMzSet(setMz))); + SpectrumList_Filter(msd.run.spectrumListPtr, + SpectrumList_FilterPredicate_PrecursorMzSet(setMz))); } +UsageInfo usage_mzPrecursors = {"<precursor_mz_list>", + "Retains spectra with precursor m/z values found in the <precursor_mz_list>. For example, in msconvert to retain " + "only spectra with precursor m/z values of 123.4 and 567.8 you would use --filter \"mzPrecursors [123.4,567.8]\". " + "Note that this filter will drop MS1 scans unless you include 0.0 in the list of precursor values." + }; SpectrumListPtr filterCreator_msLevel(const MSData& msd, const string& arg) { @@ -358,7 +437,10 @@ SpectrumList_Filter(msd.run.spectrumListPtr, SpectrumList_FilterPredicate_MSLevelSet(msLevelSet))); } +UsageInfo usage_msLevel = {"<mslevels>", + "This filter selects only spectra with the indicated <mslevels>, expressed as an int_set."}; + SpectrumListPtr filterCreator_mzPresent(const MSData& msd, const string& arg) { istringstream parser(arg); @@ -422,12 +504,24 @@ inverse = true; if (open!='[' || close!=']') + { + cerr << "mzPresent filter expected a list of mz values like \"[100,200,300.4\\" << endl ; return SpectrumListPtr(); + } return SpectrumListPtr(new SpectrumList_Filter(msd.run.spectrumListPtr, SpectrumList_FilterPredicate_MzPresent(mzt, setMz, ThresholdFilter(byType, threshold, orientation, msLevels), inverse))); } +UsageInfo usage_mzPresent = {"<tolerance> <type> <threshold> <orientation> <mz_list> [<include_or_exclude>]", + "This filter is similar to the \"threshold\" filter, with a few more options.\n" + " <tolerance> is specified as a number and units (PPM or MZ). For example, \"5 PPM\" or \"2.1 MZ\".\n" + " <type>, <threshold>, and <orientation> operate as in the \"threshold\" filter (see above).\n" + " <mz_list> is a list of mz values of the form [mz1,mz2, ... mzn] (for example, \"[100, 300, 405.6]\"). " + "Data points within <tolerance> of any of these values will be kept.\n" + " <include_or_exclude> is optional and has value \"include\" (the default) or \"exclude\". If \"exclude\" is " + "used the filter drops data points that match the various criteria instead of keeping them." +}; SpectrumListPtr filterCreator_chargeState(const MSData& msd, const string& arg) { @@ -438,8 +532,10 @@ SpectrumList_Filter(msd.run.spectrumListPtr, SpectrumList_FilterPredicate_ChargeStateSet(chargeStateSet))); } +UsageInfo usage_chargeState = {"<charge_states>", + "This filter keeps spectra that match the listed charge state(s), expressed as an int_set. Both known/single " + "and possible/multiple charge states are tested. Use 0 to include spectra with no charge state at all."}; - SpectrumListPtr filterCreator_defaultArrayLength(const MSData& msd, const string& arg) { IntegerSet defaultArrayLengthSet; @@ -449,20 +545,52 @@ SpectrumList_Filter(msd.run.spectrumListPtr, SpectrumList_FilterPredicate_DefaultArrayLengthSet(defaultArrayLengthSet))); } +UsageInfo usage_defaultArrayLength = { "<peak_count_range>", + "Keeps only spectra with peak counts within <peak_count_range>, expressed as an int_set. (In mzML the peak list " + "length is expressed as \"defaultArrayLength\", hence the name.) For example, to include only spectra with 100 " + "or more peaks, you would use filter \"defaultArrayLength 100-\" ." + }; SpectrumListPtr filterCreator_metadataFixer(const MSData& msd, const string& arg) { return SpectrumListPtr(new SpectrumList_MetadataFixer(msd.run.spectrumListPtr)); } +UsageInfo usage_metadataFixer={"","This filter is used to add or replace a spectra's TIC/BPI metadata, usually after " + "peakPicking where the change from profile to centroided data may make the TIC and BPI values inconsistent with " + "the revised scan data. The filter traverses the m/z intensity arrays to find the sum and max. For example, in " + "msconvert it can be used as: --filter \"peakPicking true 1-\" --filter metadataFixer. It can also be used without " + "peak picking for some strange results. Certainly adding up all the samples of profile data to get the TIC is " + "just wrong, but we do it anyway."}; - SpectrumListPtr filterCreator_titleMaker(const MSData& msd, const string& arg) { return SpectrumListPtr(new SpectrumList_TitleMaker(msd, bal::trim_copy(arg))); } +UsageInfo usage_titleMaker={"<format_string>","This filter adds or replaces spectrum titles according to specified " + "<format_string>. You can use it, for example, to customize the TITLE line in MGF output in msconvert. The following " + "keywords are recognized: \n" + " \"<RunId>\" - prints the spectrum's Run id - for example, \"Data.d\" from \"C:/Agilent/Data.d/AcqData/mspeak.bin\"\n" + " \"<Index>\" - prints the spectrum's index\n" + " \"<Id>\" - prints the spectrum's nativeID\n" + " \"<SourcePath>\" - prints the path of the spectrum's source data\n" + " \"<ScanNumber>\" - if the nativeID can be represented as a single number, prints that number, else index+1\n" + " \"<ActivationType>\" - for the first precursor, prints the spectrum's \"dissociation method\" value\n" + " \"<IsolationMz>\" - for the first precursor, prints the the spectrum's \"isolation target m/z\" value\n" + " \"<PrecursorSpectrumId>\" - prints the nativeID of the spectrum of the first precursor\n" + " \"<SelectedIonMz>\" - prints the m/z value of the first selected ion of the first precursor\n" + " \"<ChargeState>\" - prints the charge state for the first selected ion of the first precursor\n" + " \"<SpectrumType>\" - prints the spectrum type\n" + " \"<ScanStartTimeInSeconds>\" - prints the spectrum's first scan's start time, in seconds\n" + " \"<ScanStartTimeInMinutes>\" - prints the spectrum's first scan's start time, in minutes\n" + " \"<BasePeakMz>\" - prints the spectrum's base peak m/z\n" + " \"<BasePeakIntensity>\" - prints the spectrum's base peak intensity\n" + " \"<TotalIonCurrent>\" - prints the spectrum's total ion current\n" + " \"<MsLevel>\" - prints the spectrum's MS level\n" + "For example, to create a TITLE line in msconvert MGF output with the \"name.first_scan.last_scan.charge\" style (eg. \"mydata.145.145.2\"), use\n" + "--filter \"titleMaker <RunId>.<ScanNumber>.<ScanNumber>.<ChargeState>\"" +}; - SpectrumListPtr filterCreator_chargeStatePredictor(const MSData& msd, const string& arg) { istringstream parser(arg); @@ -473,13 +601,21 @@ return SpectrumListPtr(new SpectrumList_ChargeStateCalculator(msd.run.spectrumListPtr, overrideExistingCharge == "false" || overrideExistingCharge == "0" ? false : true, - lexical_cast<int>(maxMultipleCharge), - lexical_cast<int>(minMultipleCharge), - lexical_cast<double>(singleChargeFractionTIC), + maxMultipleCharge!=""?lexical_cast<int>(maxMultipleCharge):3, + minMultipleCharge!=""?lexical_cast<int>(minMultipleCharge):2, + singleChargeFractionTIC!=""?lexical_cast<double>(singleChargeFractionTIC):0.9, makeMS2 == "true" || makeMS2 == "1" ? true : false)); } +UsageInfo usage_chargeStatePredictor = {"[<overrideExistingCharge> [<maxMultipleCharge> [<minMultipleCharge> [<singleChargeFractionTIC> [<algorithmMakeMS2>]]]]]", + "Predicts MSn spectrum precursors to be singly or multiply charged depending on the ratio of intensity above and below the precursor m/z, or optionally using the \"makeMS2\" algorithm\n" + " <overrideExistingCharge> : always override existing charge information (default:\"true\")\n" + " <maxMultipleCharge> (default 3) and <minMultipleCharge> (default 2): range of values to add to the spectrum's existing \"MS_possible_charge_state\" values." + "If these are the same values, the spectrum's MS_possible_charge_state values are removed and replaced with this single value.\n" + " <singleChargeFractionTIC> : is a percentage expressed as a value between 0 and 1 (the default is 0.9, or 90 percent). " + "This is the value used as the previously mentioned ratio of intensity above and below the precursor m/z.\n" + " <algorithmMakeMS2> : default is \"false\", when set to \"true\" the \"makeMS2\" algorithm is used instead of the one described above." + }; - /** * filter on the basis of ms2 activation type * @@ -547,8 +683,13 @@ return SpectrumListPtr(new SpectrumList_Filter(msd.run.spectrumListPtr, SpectrumList_FilterPredicate_ActivationType(cvIDs, hasNot))); } +UsageInfo usage_activation = { "<precursor_activation_type>", + "Keeps only spectra whose precursors have the specifed activation type. It doesn't affect non-MS spectra, and doesn't " + "affect MS1 spectra. Use it to create output files containing only ETD or CID MSn data where both activation modes " + "have been interleaved within a given input vendor data file (eg: Thermo's Decision Tree acquisition mode).\n" + " <precursor_activation_type> is any one of: ETD CID SA HCD BIRD ECD IRMPD PD PSD PQD SID or SORI." + }; - SpectrumListPtr filterCreator_AnalyzerType(const MSData& msd, const string& arg) { istringstream parser(arg); @@ -575,6 +716,16 @@ SpectrumList_FilterPredicate_AnalyzerType(cvIDs))); } +UsageInfo usage_analyzerTypeOld = { "<analyzer>", + "This is deprecated syntax for filtering by mass analyzer type.\n" + " <analyzer> can be \"FTMS\" or \"ITMS\"." +}; +UsageInfo usage_analyzerType = { "<analyzer>", + "This filter keeps only spectra with the indicated mass analyzer type. \n" + " <analyzer> is any one of \"quad\" \"orbi\" \"FT\" \"IT\" or \"TOF\".\n" + "Sometimes people use the terms FT and Orbi interchangeably, which is OK " + "because there are no hybrid FT+Orbi instruments - so this filter does too.\n" +}; SpectrumListPtr filterCreator_thresholdFilter(const MSData& msd, const string& arg) { @@ -611,7 +762,10 @@ else if (byTypeArg == "tic-cutoff") byType = ThresholdFilter::ThresholdingBy_FractionOfTotalIntensityCutoff; else + { + cerr << "unknown ThresholdFilter type " << byTypeArg << endl; return SpectrumListPtr(); + } ThresholdFilter::ThresholdingOrientation orientation; if (orientationArg == "most-intense") @@ -619,11 +773,28 @@ else if (orientationArg == "least-intense") orientation = ThresholdFilter::Orientation_LeastIntense; else + { + cerr << "unknown ThresholdFilter orientation " << orientationArg << endl; return SpectrumListPtr(); + } SpectrumDataFilterPtr filter(new ThresholdFilter(byType, threshold, orientation, msLevels)); return SpectrumListPtr(new SpectrumList_PeakFilter(msd.run.spectrumListPtr, filter)); } +UsageInfo usage_thresholdFilter={"<type> <threshold> <orientation> [<mslevels>]", + "This filter keeps data whose values meet various threshold criteria.\n" + " <type> must be one of:\n" + " count - keep the n=<threshold> [most|least] intense data points, where n is an integer. Any data points with the same intensity as the nth [most|least] intense data point are removed.\n" + " count-after-ties - like \"count\", except that any data points with the same intensity as the nth [most|least] data point are retained.\n" + " absolute - keep data whose absolute intensity is [more|less] than <threshold>\n" + " bpi-relative - keep data whose intensity is [more|less] than <threshold> percent of the base peak intensity. Percentage is expressed as a number between 0 and 1, for example 75 percent is \"0.75\".\n" + " tic-relative - keep data whose individual intensities are [more|less] than <threshold> percent of the total ion current for the scan. Again, precentage is expressed as a number between 0 and 1.\n" + " tic-cutoff - keep the [most|least] intense data points up to <threshold> percent of the total ion current. That is, the TIC of the retained points is <threshold> percent (expressed as a number between 0 and 1) of the original TIC.\n" + " <orientation> must be one of:\n" + " most-intense (keep m/z-intensity pairs above the threshold)\n" + " least-intense (keep m/z-intensity pairs below the threshold)\n" + " <mslevels> is an optional int_set of MS levels - if provided, only scans with those MS levels will be filtered, and others left untouched." +}; SpectrumListPtr filterCreator_polarityFilter(const MSData& msd, const string& arg) { @@ -647,47 +818,50 @@ return SpectrumListPtr(new SpectrumList_Filter(msd.run.spectrumListPtr, SpectrumList_FilterPredicate_Polarity(polarity))); } +UsageInfo usage_polarity = { "<polarity>", + "Keeps only spectra with scan of the selected <polarity>.\n" + " <polarity> is any one of \"positive\" \"negative\" \"+\" or \"-\"." +}; - struct JumpTableEntry { const char* command; - const char* usage; + UsageInfo &usage; // {const char *usage,const char &details} FilterCreator creator; }; JumpTableEntry jumpTable_[] = { - {"index", "int_set", filterCreator_index}, - {"msLevel", "int_set", filterCreator_msLevel}, - {"chargeState", "int_set (includes both known/single and possible/multiple charge states; use 0 to include spectra with no charge state at all)", filterCreator_chargeState}, - {"precursorRecalculation", " (based on ms1 data)", filterCreator_precursorRecalculation}, - {"precursorRefine", " (based on ms1 data)", filterCreator_precursorRefine}, - {"peakPicking", "prefer_vendor:<true|false> int_set(MS levels)", filterCreator_nativeCentroid}, - {"scanNumber", "int_set", filterCreator_scanNumber}, - {"scanEvent", "int_set", filterCreator_scanEvent}, - {"scanTime", "[scanTimeLow,scanTimeHigh]", filterCreator_scanTime}, - {"sortByScanTime", "(sort by ascending scan start time)", filterCreator_sortScanTime}, - {"stripIT", " (strip ion trap ms1 scans)", filterCreator_stripIT}, - {"metadataFixer", " (add/replace TIC/BPI metadata)", filterCreator_metadataFixer}, - {"titleMaker", " (add/replace spectrum title according to user-specified format string; the following keywords are recognized: <RunId> <Index> <Id> <SourcePath> <ScanNumber> <ActivationType> <IsolationMz> <SelectedIonMz> <ChargeState> <PrecursorSpectrumId> <SpectrumType> <MsLevel> <ScanStartTimeInMinutes> <ScanStartTimeInSeconds> <BasePeakMz> <BasePeakIntensity> <TotalIonCurrent>", filterCreator_titleMaker}, - {"threshold", "<count|count-after-ties|absolute|bpi-relative|tic-relative|tic-cutoff> <threshold> <most-intense|least-intense> [int_set(MS levels)]", filterCreator_thresholdFilter}, - {"mzWindow", "[mzLow,mzHigh]", filterCreator_mzWindow}, - {"mzPrecursors", "[mz1,mz2, ... mzn] zero for no precursor m/z", filterCreator_mzPrecursors}, - {"defaultArrayLength", "int_set", filterCreator_defaultArrayLength}, - {"zeroSamples", "<removeExtra|addMissing[=flankingZeroCount]> <MS levels> (remove extra, or add missing, zeros)", filterCreator_ZeroSamples}, - {"mzPresent", "<tolerance> <PPM|MZ> <count|count-after-ties|absolute|bpi-relative|tic-relative|tic-cutoff> <threshold> <most-intense|least-intense> [mz1,mz2, ... mzn] <include|exclude>", filterCreator_mzPresent}, + {"index", usage_index, filterCreator_index}, + {"msLevel", usage_msLevel, filterCreator_msLevel}, + {"chargeState", usage_chargeState, filterCreator_chargeState}, + {"precursorRecalculation", usage_precursorRecalculation, filterCreator_precursorRecalculation}, + {"precursorRefine", usage_precursorRefine, filterCreator_precursorRefine}, + {"peakPicking", usage_nativeCentroid, filterCreator_nativeCentroid}, + {"scanNumber", usage_scanNumber, filterCreator_scanNumber}, + {"scanEvent", usage_scanEvent, filterCreator_scanEvent}, + {"scanTime", usage_scanTime, filterCreator_scanTime}, + {"sortByScanTime",usage_sortScanTime, filterCreator_sortScanTime}, + {"stripIT", usage_stripIT, filterCreator_stripIT}, + {"metadataFixer", usage_metadataFixer, filterCreator_metadataFixer}, + {"titleMaker", usage_titleMaker, filterCreator_titleMaker}, + {"threshold", usage_thresholdFilter, filterCreator_thresholdFilter}, + {"mzWindow", usage_mzWindow, filterCreator_mzWindow}, + {"mzPrecursors", usage_mzPrecursors, filterCreator_mzPrecursors}, + {"defaultArrayLength", usage_defaultArrayLength, filterCreator_defaultArrayLength}, + {"zeroSamples", usage_zeroSamples , filterCreator_ZeroSamples}, + {"mzPresent", usage_mzPresent , filterCreator_mzPresent}, // MSn Spectrum Processing/Filtering - {"MS2Denoise", "moving window filter for MS2: num peaks to select in window:int_val(default 6) window width (Da):val (default 30) multicharge fragment relaxation: <true|false> (default true)", filterCreator_MS2Denoise}, - {"MS2Deisotope", "deisotope ms2 spectra using Markey method", filterCreator_MS2Deisotope}, - {"ETDFilter", "removePrecursor:<default:true|false> removeChargeReduced:<default:true|false> removeNeutralLoss:<default:true|false> blanketRemoval:<default:true|false> MatchingTolerance:(val <PPM|MZ>) (default:3.1 MZ)", filterCreator_ETDFilter}, - {"chargeStatePredictor", "overrideExistingCharge:<default:true|false> maxMultipleCharge:<int>(3) minMultipleCharge:<int>(2) singleChargeFractionTIC:<real>(0.9)", filterCreator_chargeStatePredictor}, - {"activation", "<ETD|CID|SA|HCD|BIRD|ECD|IRMPD|PD|PSD|PQD|SID|SORI> (filter by precursor activation type)", filterCreator_ActivationType}, - {"analyzerType", "<FTMS|ITMS> (deprecated syntax for filtering by mass analyzer type)", filterCreator_AnalyzerType}, - {"analyzer", "<quad|orbi|FT|IT|TOF> (filter by mass analyzer type)", filterCreator_AnalyzerType}, - {"polarity", "<positive|negative|+|-> (filter by scan polarity)", filterCreator_polarityFilter} + {"MS2Denoise", usage_MS2Denoise , filterCreator_MS2Denoise}, + {"MS2Deisotope", usage_MS2Deisotope , filterCreator_MS2Deisotope}, + {"ETDFilter", usage_ETDFilter , filterCreator_ETDFilter}, + {"chargeStatePredictor", usage_chargeStatePredictor , filterCreator_chargeStatePredictor}, + {"activation", usage_activation , filterCreator_ActivationType}, + {"analyzer", usage_analyzerType , filterCreator_AnalyzerType}, + {"analyzerType", usage_analyzerTypeOld , filterCreator_AnalyzerType}, + {"polarity", usage_polarity , filterCreator_polarityFilter} }; @@ -724,30 +898,30 @@ if (entry == jumpTableEnd_) { - // possibly a quoted commandline copied to a config file, - // eg filter=\"index [3,7]\" or filter=\"precursorRecalculation\" - string quot; - if (bal::starts_with(command,"\"")) - quot="\""; - else if (bal::starts_with(command,"'")) - quot="\'"; - if (quot.size()) - { - command = command.substr(1); - if (arg.size()) - { - if (bal::ends_with(arg,quot)) - { - arg = arg.substr(0,arg.size()-1); - } - } - else if (bal::ends_with(command,quot)) - { - command = command.substr(0,command.size()-1); - } - entry = find_if(jumpTable_, jumpTableEnd_, HasCommand(command)); - } - } + // possibly a quoted commandline copied to a config file, + // eg filter=\"index [3,7]\" or filter=\"precursorRecalculation\" + string quot; + if (bal::starts_with(command,"\"")) + quot="\""; + else if (bal::starts_with(command,"'")) + quot="\'"; + if (quot.size()) + { + command = command.substr(1); + if (arg.size()) + { + if (bal::ends_with(arg,quot)) + { + arg = arg.substr(0,arg.size()-1); + } + } + else if (bal::ends_with(command,quot)) + { + command = command.substr(0,command.size()-1); + } + entry = find_if(jumpTable_, jumpTableEnd_, HasCommand(command)); + } + } if (entry == jumpTableEnd_) { cerr << "[SpectrumListFactory] Ignoring wrapper: " << wrapper << endl; @@ -778,26 +952,67 @@ PWIZ_API_DECL -string SpectrumListFactory::usage() +string SpectrumListFactory::usage(bool detailedHelp,const std::string &morehelp_prompt) { ostringstream oss; + MSData fakemsd; - oss << "\nFilter options:\n\n"; + oss << endl; + oss << "FILTER OPTIONS" << endl; + if (!detailedHelp) + { + oss << morehelp_prompt << endl; + } + else + { + oss << endl; + oss << "Note: \'int_set\' means that a set of integers must be specified, as a list of intervals of the form [a,b] or a[-][b].\n"; + oss << "For example \'[0,3]\' and \'0-3\' both mean \'the set of integers from 0 to 3 inclusive\'.\n"; + oss << "\'1-\' means \'the set of integers from 1 to the largest allowable number\'. \n"; + oss << "\'9\' is also an integer set, equivalent to \'[9,9]\'.\n"; + oss << "\'[0,2] 5-7\' is the set \'0 1 2 5 6 7\'. \n"; + } + for (JumpTableEntry* it=jumpTable_; it!=jumpTableEnd_; ++it) - oss << it->command << " " << it->usage << endl; + { + if (detailedHelp) + oss << it->command << " " << it->usage[0] << endl << it->usage[1] << endl << endl ; + else + oss << it->command << " " << it->usage[0] << endl ; + } oss << endl; - oss << "\'int_set\' means that a set of integers must be specified, as a list of intervals of the form [a,b] or a[-][b].\n"; - oss << "For example \'[0,3]\' and \'0-3\' both mean \'the set of integers from 0 to 3 inclusive\'.\n"; - oss << "\'1-\' means \'the set of integers from 1 to the largest allowable number\'. \n"; - oss << "\'9\' is also an integer set, equivalent to \'[9,9]\'.\n"; - oss << "\'[0,2] 5-7\' is the set \'0 1 2 5 6 7\'. \n"; + // tidy up the word wrap + std::string str = oss.str(); + const size_t wrap = 70; // wrap at 70 columns + size_t lastPos = 0; + for (size_t curPos = wrap ; curPos < str.length(); ) + { + std::string::size_type newlinePos = str.rfind( '\n', curPos ); + if( newlinePos == std::string::npos || (newlinePos <= lastPos)) + { // no newline within next wrap chars, add one + std::string::size_type spacePos = str.rfind( ' ', curPos ); + lastPos = curPos; + if( spacePos == std::string::npos ) + { + curPos++; // no spaces, go long + } + else + { + str[ spacePos ] = '\n'; + curPos = spacePos + wrap + 1; + } + } + else + { + lastPos = curPos; + curPos = newlinePos + wrap + 1; + } + } - oss << endl; - - return oss.str(); + return str; } Modified: trunk/pwiz/pwiz/analysis/spectrum_processing/SpectrumListFactory.hpp =================================================================== --- trunk/pwiz/pwiz/analysis/spectrum_processing/SpectrumListFactory.hpp 2013-03-25 22:36:51 UTC (rev 4443) +++ trunk/pwiz/pwiz/analysis/spectrum_processing/SpectrumListFactory.hpp 2013-03-26 22:20:46 UTC (rev 4444) @@ -46,8 +46,8 @@ /// instantiate a list of SpectrumListWrappers static void wrap(msdata::MSData& msd, const std::vector<std::string>& wrappers); - /// user-friendly documentation - static std::string usage(); + /// user-friendly documentation, with option of less or more detail + static std::string usage(bool detailedHelp,const std::string &morehelp_prompt); }; Modified: trunk/pwiz/pwiz_tools/commandline/msconvert.cpp =================================================================== --- trunk/pwiz/pwiz_tools/commandline/msconvert.cpp 2013-03-25 22:36:51 UTC (rev 4443) +++ trunk/pwiz/pwiz_tools/commandline/msconvert.cpp 2013-03-26 22:20:46 UTC (rev 4444) @@ -166,6 +166,7 @@ bool noindex = false; bool zlib = false; bool gzip = false; + bool detailedHelp = false; po::options_description od_config("Options"); od_config.add_options() @@ -262,15 +263,40 @@ ("srmAsSpectra", po::value<bool>(&config.srmAsSpectra)->zero_tokens(), ": write selected reaction monitoring as spectra, not chromatograms") + ("help", + po::value<bool>(&detailedHelp)->zero_tokens(), + ": show this message, with extra detail on filter options") ; + // handle positional arguments + + const char* label_args = "args"; + + po::options_description od_args; + od_args.add_options()(label_args, po::value< vector<string> >(), ""); + + po::positional_options_description pod_args; + pod_args.add(label_args, -1); + + po::options_description od_parse; + od_parse.add(od_config).add(od_args); + + // parse command line + + po::variables_map vm; + po::store(po::command_line_parser(argc, (char**)argv). + options(od_parse).positional(pod_args).run(), vm); + po::notify(vm); + + + // append options description to usage string usage << od_config; // extra usage - usage << SpectrumListFactory::usage() << endl; + usage << SpectrumListFactory::usage(detailedHelp,"run this command with --help to see more detail") << endl; usage << "Examples:\n" << endl @@ -342,29 +368,9 @@ << "ProteoWizard Analysis: " << pwiz::analysis::Version::str() << " (" << pwiz::analysis::Version::LastModified() << ")" << endl << "Build date: " << __DATE__ << " " << __TIME__ << endl; - if (argc <= 1) + if ((argc <= 1) || detailedHelp) throw usage_exception(usage.str().c_str()); - // handle positional arguments - - const char* label_args = "args"; - - po::options_description od_args; - od_args.add_options()(label_args, po::value< vector<string> >(), ""); - - po::positional_options_description pod_args; - pod_args.add(label_args, -1); - - po::options_description od_parse; - od_parse.add(od_config).add(od_args); - - // parse command line - - po::variables_map vm; - po::store(po::command_line_parser(argc, (char**)argv). - options(od_parse).positional(pod_args).run(), vm); - po::notify(vm); - // parse config file if required if (!configFilename.empty()) Modified: trunk/pwiz/pwiz_tools/common/MSDataAnalyzerApplication.cpp =================================================================== --- trunk/pwiz/pwiz_tools/common/MSDataAnalyzerApplication.cpp 2013-03-25 22:36:51 UTC (rev 4443) +++ trunk/pwiz/pwiz_tools/common/MSDataAnalyzerApplication.cpp 2013-03-26 22:20:46 UTC (rev 4444) @@ -28,7 +28,7 @@ #include "pwiz/data/msdata/MSDataFile.hpp" #include "pwiz/analysis/spectrum_processing/SpectrumListFactory.hpp" #include "pwiz/utility/misc/Filesystem.hpp" -#include "pwiz/utility/misc/Std.hpp" +#include "pwiz/utility/misc/Std.hpp" #include "boost/program_options.hpp" @@ -51,6 +51,7 @@ string filelistFilename; string configFilename; + bool detailedHelp = false; po::options_description od_config(""); od_config.add_options() @@ -68,17 +69,16 @@ ": execute command, e.g --exec \"tic mz=409-412\"") ("filter", po::value< vector<string> >(&filters), - (": add a spectrum list filter, e.g. --filter=\"msLevel [2,3]\"\n" + SpectrumListFactory::usage()).c_str()) + ": add a spectrum list filter, e.g. --filter=\"msLevel [2,3]\"") ("verbose,v", po::value<bool>(&verbose)->zero_tokens(), ": print progress messages") + ("help", + po::value<bool>(&detailedHelp)->zero_tokens(), + ": show this message, with extra detail on filter options") ; - // save options description - ostringstream temp; - temp << od_config; - usageOptions = temp.str(); // handle positional arguments @@ -101,8 +101,17 @@ options(od_parse).positional(pod_args).run(), vm); po::notify(vm); - // parse config file if required + // save options description + ostringstream usage; + usage << od_config; + + // extra usage for filters + usage << SpectrumListFactory::usage(detailedHelp,"run this command with --help to see more detail") << endl; + usageOptions = usage.str(); + + // parse config file if required + if (!configFilename.empty()) { ifstream is(configFilename.c_str()); Added: trunk/web/tools/filters.html =================================================================== --- trunk/web/tools/filters.html (rev 0) +++ trunk/web/tools/filters.html 2013-03-26 22:20:46 UTC (rev 4444) @@ -0,0 +1,195 @@ +<html> + +<head> + <title>ProteoWizard Technical Documentation: Filters</title> + <link rel="stylesheet" href="../main.css" type="text/css" media="screen" /> +</head> + +<body> + <h2>Filters as used in msaccess, msconvert, and other ProteoWizard tools</h2> + + <p> +<br/> +<h3>Note:</h3> +Each filter entry below has the form "filtername filterargs", where "filtername" is the literal name of the filter and "filterargs" is a list of arguments that you replace with actual values.<br/> +For example, the "index" filter as described below would be used like this in the msconvert program:<br/> +--filter "index 1-15" +<br/> +<br/> + +Many filters take 'int_set' arguments. An int_set is a set of integers written as a list of intervals of the form [a,b] or a[-][b].<br/> +For example '[0,3]' and '0-3' both mean 'the set of integers from 0 to 3 inclusive'.<br/> +'1-' means 'the set of integers from 1 to the largest allowable number'. <br/> +'9' is also an integer set, equivalent to '[9,9]'.<br/> +'[0,2] 5-7' is the set '0 1 2 5 6 7'. <br/> +<br/> + + +<br/> + + +<h3>The Filters</h3> + +<br/> +index <index_value_set><br/> +Selects spectra by index - an index value 0-based numerical order in which the spectrum appears in the input.<br/> + <index_value_set> is an int_set of indexes.<br/> +<br/> +msLevel <mslevels><br/> +This filter selects only spectra with the indicated <mslevels>, expressed as an int_set.<br/> +<br/> +chargeState <charge_states><br/> +This filter keeps spectra that match the listed charge state(s), expressed as an int_set. Both known/single and possible/multiple charge states are tested. Use 0 to include spectra with no charge state at all.<br/> +<br/> +precursorRecalculation <br/> +This filter recalculates the precursor m/z and charge for MS2 spectra. It looks at the prior MS1 scan to better infer the parent mass. However, it only works on orbitrap and FT data,although it does not use any 3rd party (vendor DLL) code. Since the time the code was written, Thermo has since fixed up its own estimation in response, so it's less critical than it used to be (though can still be useful).<br/> +<br/> +precursorRefine <br/> +This filter recalculates the precursor m/z and charge for MS2 spectra. It looks at the prior MS1 scan to better infer the parent mass. It only works on orbitrap, FT, and TOF data. It does not use any 3rd party (vendor DLL) code.<br/> +<br/> +peakPicking <prefer_vendor> <ms_levels><br/> +This filter performs centroiding on spectra with the selected <ms_levels>, expressed as an int_set. The value for <prefer_vendor> must be "True" or "False": when True, vendor (Windows DLL) code is used if available. IMPORTANT NOTE: since this filter operates on the raw data through the vendor DLLs, IT MUST BE THE FIRST FILTER IN ANY LIST OF FILTERS.<br/> +<br/> +scanNumber <scan_numbers><br/> +This filter selects spectra by scan number. Depending on the input data type, scan number and spectrum index are not always the same thing - scan numbers are not always contiguous, and are usually 1-based.<br/> +<scan_numbers> is an int_set of scan numbers to be kept.<br/> +<br/> +scanEvent <scan_event_set><br/> +This filter selects spectra by scan event. For example, to include all scan events except scan event 5, use filter "scanEvent 1-4 6-". A "scan event" is a preset scan configuration: a user-defined scan configuration that specifies the instrumental settings in which a spectrum is acquired. An instrument may cycle through a list of preset scan configurations to acquire data. This is a more generic term for the Thermo "scan event", which is defined in the Thermo Xcalibur glossary as: "a mass spectrometer scan that is defined by choosing the necessary scan parameter settings. Multiple scan events can be defined for each segment of time.".<br/> +<br/> +scanTime <scan_time_range><br/> +This filter selects only spectra within a given time range.<br/> + <scan_time_range> is a time range, specified in seconds. For example, to select only spectra within the second minute of the run, use "scanTime [60-119.99]".<br/> +<br/> +sortByScanTime <br/> +This filter reorders spectra, sorting them by ascending scan start time.<br/> +<br/> +stripIT <br/> +This filter rejects ion trap data spectra with MS level 1.<br/> +<br/> +metadataFixer <br/> +This filter is used to add or replace a spectra's TIC/BPI metadata, usually after peakPicking where the change from profile to centroided data may make the TIC and BPI values inconsistent with the revised scan data. The filter traverses the m/z intensity arrays to find the sum and max. For example, in msconvert it can be used as: --filter "peakPicking true 1-" --filter metadataFixer. It can also be used without peak picking for some strange results. Certainly adding up all the samples of profile data to get the TIC is just wrong, but we do it anyway.<br/> +<br/> +titleMaker <format_string><br/> +This filter adds or replaces spectrum titles according to specified <format_string>. You can use it, for example, to customize the TITLE line in MGF output in msconvert. The following keywords are recognized: <br/> + <RunId> - prints the spectrum's Run id - for example, "Data.d" from "C:/Agilent/Data.d/AcqData/mspeak.bin"<br/> + <Index> - prints the spectrum's index<br/> + <Id> - prints the spectrum's nativeID<br/> + <SourcePath> - prints the path of the spectrum's source data<br/> + <ScanNumber> - if the nativeID can be represented as a single number, prints that number, else index+1<br/> + <ActivationType> - for the first precursor, prints the spectrum's "dissociation method" value<br/> + <IsolationMz> - for the first precursor, prints the the spectrum's "isolation target m/z" value<br/> + <PrecursorSpectrumId> - prints the nativeID of the spectrum of the first precursor<br/> + <SelectedIonMz> - prints the m/z value of the first selected ion of the first precursor<br/> + <ChargeState> - prints the charge state for the first selected ion of the first precursor<br/> + <SpectrumType> - prints the spectrum type<br/> + <ScanStartTimeInSeconds> - prints the spectrum's first scan's start time, in seconds<br/> + <ScanStartTimeInMinutes> - prints the spectrum's first scan's start time, in minutes<br/> + <BasePeakMz> - prints the spectrum's base peak m/z<br/> + <BasePeakIntensity> - prints the spectrum's base peak intensity<br/> + <TotalIonCurrent> - prints the spectrum's total ion current<br/> + <MsLevel> - prints the spectrum's MS level<br/> +For example, to create a TITLE line in msconvert MGF output with the "name.first_scan.last_scan.charge" style (eg. "mydata.145.145.2"), use --filter "titleMaker <RunId>.<ScanNumber>.<ScanNumber>.<ChargeState>"<br/> +<br/> +threshold <type> <threshold> <orientation> [<mslevels>]<br/> +This filter keeps data whose values meet various threshold criteria.<br/> + <type> must be one of:<br/> + count - keep the n=<threshold> [most|least] intense data points, where n is an integer. Any data points with the same intensity as the nth [most|least] intense data point are removed.<br/> + count-after-ties - like "count", except that any data points with the same intensity as the nth [most|least] data point are retained.<br/> + absolute - keep data whose absolute intensity is [more|less] than <threshold><br/> + bpi-relative - keep data whose intensity is [more|less] than <threshold> percent of the base peak intensity. Percentage is expressed as a number between 0 and 1, for example 75 percent is "0.75".<br/> + tic-relative - keep data whose individual intensities are [more|less] than <threshold> percent of the total ion current for the scan. Again, precentage is expressed as a number between 0 and 1.<br/> + tic-cutoff - keep the [most|least] intense data points up to <threshold> percent of the total ion current. That is, the TIC of the retained points is <threshold> percent (expressed as a number between 0 and 1) of the original TIC.<br/> + <orientation> must be one of:<br/> + most-intense (keep m/z-intensity pairs above the threshold)<br/> + least-intense (keep m/z-intensity pairs below the threshold)<br/> + <mslevels> is an optional int_set of MS levels - if provided, only scans with those MS levels will be filtered, and others left<br/> +untouched.<br/> +<br/> +mzWindow <mzrange><br/> +keeps mz/intensity pairs whose m/z values fall within the specified range.<br/> + <mzrange> is formatted as [mzLow,mzHigh]. For example, in msconvert to retain data in the m/z range 100.1 to 307.5, use --filter "mzWindow [100.1,307.5]" .<br/> +<br/> +mzPrecursors <precursor_mz_list><br/> +Retains spectra with precursor m/z values found in the <precursor_mz_list>. For example, in msconvert to retain only spectra with precursor m/z values of 123.4 and 567.8 you would use --filter "mzPrecursors [123.4,567.8]". Note that this filter will drop MS1 scans unless you include 0.0 in the list of precursor values.<br/> +<br/> +defaultArrayLength <peak_count_range><br/> +Keeps only spectra with peak counts within <peak_count_range>, expressed as an int_set. (In mzML the peak list length is expressed as "defaultArrayLength", hence the name.) For example, to include only spectra with 100 or more peaks, you would use filter "defaultArrayLength 100-" .<br/> +<br/> +zeroSamples <mode> [<MS_levels>]<br/> +This filter deals with zero values in spectra - either removing them, or adding them where they are missing.<br/> + <mode> is either removeExtra or addMissing[=<flankingZeroCount>] .<br/> + <MS_levels> is optional, when provided (as an int_set) the filter is applied only to spectra with those MS levels.<br/> +When <mode> is "removeExtra", consecutive zero intensity peaks are removed from spectra. For example, a peak list<br/> + "100.1,1000 100.2,0 100.3,0 100.4,0 100.5,0 100.6,1030"<br/> +would become <br/> + "100.1,1000 100.2,0 100.5,0 100.6,1030"<br/> +and a peak list <br/> + "100.1,0 100.2,0 100.3,0 100.4,0 100.5,0 100.6,1030 100.7,0 100.8,1020 100.9,0 101.0,0"<br/> +would become <br/> + "100.5,0 100.6,1030 100.7,0 100.8,1020 100.9,0"<br/> +When <mode> is "addMissing", each spectrum's sample rate is automatically determined (the rate can change but only gradually) and flanking zeros are inserted around non-zero data points. The optional [=<flankingZeroCount>] value can be used to limit the number of flanking zeros, otherwise the spectrum is completely populated between nonzero points. For example, to make sure spectra have at least 5 flanking zeros around runs on nonzero points, use filter "addMissing=5".<br/> +<br/> +mzPresent <tolerance> <type> <threshold> <orientation> <mz_list> [<include_or_exclude>]<br/> +This filter is similar to the "threshold" filter, with a few more options.<br/> + <tolerance> is specified as a number and units (PPM or MZ). For example, "5 PPM" or "2.1 MZ".<br/> + <type>, <threshold>, and <orientation> operate as in the "threshold" filter (see above).<br/> + <mz_list> is a list of mz values of the form [mz1,mz2, ... mzn] (for example, "[100, 300, 405.6]"). Data points within <tolerance> of any of these values will be kept.<br/> + <include_or_exclude> is optional and has value "include" (the default) or "exclude". If "exclude" is used the filter drops data points that match the various criteria instead of keeping them.<br/> +<br/> +MS2Denoise [<peaks_in_window> [<window_width_Da> [multicharge_fragment_relaxation]]]<br/> +A moving window filter for MS2.<br/> + <peaks_in_window> - the number peaks to select in window, default is 6.<br/> + <window_width_Da> - the width of the window in Da, default is 30.<br/> + <multicharge_fragment_relaxation> - if "true" (the default), allows more data below multiply charged precursors.<br/> +<br/> +MS2Deisotope [<hi_res> [<mz_tolerance>]]<br/> +Deisotopes ms2 spectra using Markey method.<br/> + <hi_res> sets high resolution mode to "false" (the default) or "true".<br/> + <mz_tolerance> sets the mz tolerance. It defaults to .01 in high resoltion mode, otherwise it defaults to 0.5.<br/> +<br/> +ETDFilter [<removePrecursor> [<removeChargeReduced> [<removeNeutralLoss> [<blanketRemoval> [<matchingTolerance> ]]]]]<br/> +Filters ETD MSn spectrum data points, removing unreacted precursors, charge-reduced precursors, and neutral losses.<br/> + <removePrecursor> - specify "true" to remove unreacted precursor (default is "false")<br/> + <removeChargeReduced> - specify "true" to remove charge reduced precursor (default is "false")<br/> + <removeNeutralLoss> - specify "true" to remove neutral loss species from charge reduced precursor (default is "false")<br/> + <matchingTolerance> - specify matching tolerance in MZ or PPM (examples: "3.1 MZ" (the default) or "2.2 PPM")<br/> +<br/> +chargeStatePredictor [<overrideExistingCharge> [<maxMultipleCharge><br/> +[<minMultipleCharge> [<singleChargeFractionTIC><br/> +[<algorithmMakeMS2>]]]]]<br/> +Predicts MSn spectrum precursors to be singly or multiply charged depending on the ratio of intensity above and below the precursor m/z, or optionally using the "makeMS2" algorithm<br/> + <overrideExistingCharge> : always override existing charge information (default:"true")<br/> + <maxMultipleCharge> (default 3) and <minMultipleCharge> (default 2): range of values to add to the spectrum's existing "MS_possible_charge_state" values.If these are the same values, the spectrum's MS_possible_charge_state values are removed and replaced with this single value.<br/> + <singleChargeFractionTIC> : is a percentage expressed as a value between 0 and 1 (the default is 0.9, or 90 percent). This is the value used as the previously mentioned ratio of intensity above and below the precursor m/z.<br/> + <algorithmMakeMS2> : default is "false", when set to "true" the "makeMS2" algorithm is used instead of the one described above.<br/> +<br/> +activation <precursor_activation_type><br/> +Keeps only spectra whose precursors have the specifed activation type. It doesn't affect non-MS spectra, and doesn't affect MS1 spectra. Use it to create output files containing only ETD or CID MSn data where both activation modes have been interleaved within a given input vendor data file (eg: Thermo's Decision Tree acquisition mode).<br/> + <precursor_activation_type> is any one of: ETD CID SA HCD BIRD ECD IRMPD PD PSD PQD SID or SORI.<br/> +<br/> +analyzer <analyzer><br/> +This filter keeps only spectra with the indicated mass analyzer type. <br/> + <analyzer> is any one of "quad" "orbi" "FT" "IT" or "TOF".<br/> +Sometimes people use the terms FT and Orbi interchangeably, which is OK because there are no hybrid FT+Orbi instruments - so this filter does too.<br/> +<br/> +<br/> +analyzerType <analyzer><br/> +This is deprecated syntax for filtering by mass analyzer type.<br/> + <analyzer> can be "FTMS" or "ITMS".<br/> +<br/> +polarity <polarity><br/> +Keeps only spectra with scan of the selected <polarity>.<br/> + <polarity> is any one of "positive" "negative" "+" or "-".<br/> + + + + + </p> + + <hr/> + + +</body> +</html> + Modified: trunk/web/tools/msaccess.html =================================================================== --- trunk/web/tools/msaccess.html 2013-03-25 22:36:51 UTC (rev 4443) +++ trunk/web/tools/msaccess.html 2013-03-26 22:20:46 UTC (rev 4444) @@ -11,7 +11,7 @@ <i>msaccess</i> is a command line tool for extracting data and metadata from data files. <p> - Examples (see below for many more options): + Examples (see below for many more options, see <a href="../tools/filters.html">here</a> for details on the --filter option): <ul> <li><code>msaccess data.mzML -x spectrum_table</code><br/> (creates data.mzML.spectrum_table.txt with summary information for all spectra as read from the scan headers)<p/></li> @@ -50,69 +50,7 @@ -c [ --config ] arg : configuration file (containing settings as optionName=value)<br /> -x [ --exec ] arg : execute command, e.g --exec "tic mz=409-412"<br /> --filter arg : add a spectrum list filter, e.g. --filter="msLevel [2,3]"<br /> - <br /> - Filter options:<br /> - <br /> - index int_set<br /> - msLevel int_set<br /> - precursorRecalculation (based on ms1 data)<... [truncated message content] |