Skip to content

Additional OpenSwath Speedups (IsotopeCacher + LinearResampler) #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 15 commits into
base: feature/oswSpeedUp
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

#pragma once

#include <OpenMS/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.h>
#include <OpenMS/CHEMISTRY/AASequence.h>
#include <OpenMS/OPENSWATHALGO/DATAACCESS/DataStructures.h>

Expand Down Expand Up @@ -168,7 +169,7 @@ namespace OpenMS
/// Old + new peaks are pushed to @p isotopeMasses
OPENMS_DLLAPI void addSinglePeakIsotopes2Spec(double mz, double ity,
std::vector<std::pair<double, double> >& isotope_masses, //[out]
Size nr_isotopes, int charge);
Size nr_isotopes, int charge, const OpenSwathIsotopeGeneratorCacher& isotopeCacher);

/// sorts vector of pairs by first
OPENMS_DLLAPI void sortByFirst(std::vector<std::pair<double, double> >& tmp);
Expand Down
7 changes: 5 additions & 2 deletions src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAPrescoring.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
#include <OpenMS/OPENSWATHALGO/DATAACCESS/TransitionExperiment.h>

#include <OpenMS/DATASTRUCTURES/DefaultParamHandler.h>
#include <OpenMS/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.h>


namespace OpenMS
{
Expand Down Expand Up @@ -84,15 +86,16 @@ namespace OpenMS
double& dotprod,
double& manhattan,
double drift_start,
double drift_end) const;
double drift_end,
const OpenSwathIsotopeGeneratorCacher& isotopeCacher) const;

/**
@brief Compute manhattan and dotprod score for all spectra which can be accessed by
the SpectrumAccessPtr for all transitions groups in the LightTargetedExperiment.
*/
void operator()(const OpenSwath::SpectrumAccessPtr& swath_ptr,
OpenSwath::LightTargetedExperiment& transition_exp_used,
OpenSwath::IDataFrameWriter* ivw, double drift_start, double drift_end) const;
OpenSwath::IDataFrameWriter* ivw, double drift_start, double drift_end, const OpenSwathIsotopeGeneratorCacher& isotopeCacher) const;
};


Expand Down
15 changes: 10 additions & 5 deletions src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAScoring.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@
#include <OpenMS/OPENSWATHALGO/DATAACCESS/ITransition.h>
#include <OpenMS/OPENSWATHALGO/DATAACCESS/TransitionExperiment.h>


#include <OpenMS/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.h>

namespace OpenMS
{
class TheoreticalSpectrumGenerator;
Expand Down Expand Up @@ -113,7 +116,8 @@ namespace OpenMS
double& isotope_corr,
double& isotope_overlap,
double drift_lower,
double drift_upper) const;
double drift_upper,
const OpenSwathIsotopeGeneratorCacher& isotopeCacher) const;

/// Massdiff scores, see class description
void dia_massdiff_score(const std::vector<TransitionType>& transitions,
Expand All @@ -138,7 +142,7 @@ namespace OpenMS

/// Precursor isotope scores for precursors (peptides and metabolites)
void dia_ms1_isotope_scores_averagine(double precursor_mz, const std::vector<SpectrumPtrType>& spectrum,
double& isotope_corr, double& isotope_overlap, int charge_state, double drift_start, double drift_end) const;
double& isotope_corr, double& isotope_overlap, int charge_state, double drift_start, double drift_end, const OpenSwathIsotopeGeneratorCacher& isotopeCacher) const;
void dia_ms1_isotope_scores(double precursor_mz, const std::vector<SpectrumPtrType>& spectrum,
double& isotope_corr, double& isotope_overlap, const EmpiricalFormula& sum_formula, double drift_start, double drift_end) const;

Expand All @@ -153,7 +157,8 @@ namespace OpenMS
double& dotprod,
double& manhattan,
double drift_start,
double drift_end) const;
double drift_end,
const OpenSwathIsotopeGeneratorCacher& isotopeCacher) const;
//@}

private:
Expand All @@ -172,7 +177,7 @@ namespace OpenMS
const std::vector<SpectrumPtrType>& spectrum,
std::map<std::string, double>& intensities,
double& isotope_corr,
double& isotope_overlap, double drift_start, double drift_end) const;
double& isotope_overlap, double drift_start, double drift_end, const OpenSwathIsotopeGeneratorCacher& isotopeCacher) const;

/// retrieves intensities from MRMFeature
/// computes a vector of relative intensities for each feature (output to intensities)
Expand Down Expand Up @@ -210,7 +215,7 @@ namespace OpenMS
*/
double scoreIsotopePattern_(const std::vector<double>& isotopes_int,
double product_mz,
int putative_fragment_charge) const;
int putative_fragment_charge, const OpenSwathIsotopeGeneratorCacher& isotopeCacher) const;

/**
@brief Compare an experimental isotope pattern to a theoretical one
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@
#define USE_SP_INTERFACE

// Actual scoring
#include <OpenMS/ANALYSIS/OPENSWATH/OpenSwathScoring.h>

#include <OpenMS/TRANSFORMATIONS/FEATUREFINDER/EmgScoring.h>
#include <OpenMS/ANALYSIS/OPENSWATH/DIAScoring.h>
#include <OpenMS/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.h>
#include <OpenMS/ANALYSIS/OPENSWATH/OpenSwathScoring.h>
#include <OpenMS/ANALYSIS/OPENSWATH/SONARScoring.h>
#include <OpenMS/TRANSFORMATIONS/FEATUREFINDER/EmgScoring.h>

// Kernel classes
#include <OpenMS/KERNEL/StandardTypes.h>
Expand Down Expand Up @@ -132,7 +132,8 @@ namespace OpenMS
FeatureMap& output,
const TargetedExperiment& transition_exp,
const TransformationDescription& trafo,
const PeakMap& swath_map);
const PeakMap& swath_map,
const OpenSwathIsotopeGeneratorCacher& isotopeCacher);

/** @brief Pick and score features in a single experiment from chromatograms
*
Expand All @@ -153,7 +154,8 @@ namespace OpenMS
const OpenSwath::LightTargetedExperiment& transition_exp,
const TransformationDescription& trafo,
const std::vector<OpenSwath::SwathMap>& swath_maps,
TransitionGroupMapType& transition_group_map);
TransitionGroupMapType& transition_group_map,
const OpenSwathIsotopeGeneratorCacher& isotopeCacher);

/** @brief Prepares the internal mappings of peptides and proteins.
*
Expand Down Expand Up @@ -186,6 +188,7 @@ namespace OpenMS
const TransformationDescription & trafo,
const std::vector<OpenSwath::SwathMap>& swath_maps,
FeatureMap& output,
const OpenSwathIsotopeGeneratorCacher& isotopeCacher,
bool ms1only = false) const;

/** @brief Set the flag for strict mapping
Expand Down Expand Up @@ -268,15 +271,18 @@ namespace OpenMS
* @param swath_maps Optional SWATH-MS (DIA) map corresponding from which
* the chromatograms were extracted. Use empty map if no
* data is available.
* @param OpenSwathIsotopeGeneratorCacher cache containing precomputed isotope distributions
* @return a struct of type OpenSwath_Ind_Scores containing either target or decoy values
*
*/
OpenSwath_Ind_Scores scoreIdentification_(MRMTransitionGroupType& transition_group_identification,
OpenSwathScoring& scorer,
const size_t feature_idx,
const std::vector<std::string> & native_ids_detection,
const double det_intensity_ratio_score,
const double det_mi_ratio_score,
const std::vector<OpenSwath::SwathMap>& swath_maps) const;
const std::vector<OpenSwath::SwathMap>& swath_maps,
const OpenSwathIsotopeGeneratorCacher& isotopeCacher) const;

void prepareFeatureOutput_(OpenMS::MRMFeature& mrmfeature, bool ms1only, int charge) const;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,8 @@ namespace OpenMS
String native_id = chromatogram.getNativeID();

// only pick detecting transitions (skip all others)
if (transition_group.getTransitions().size() > 0 &&
transition_group.hasTransition(native_id) &&
if (transition_group.getTransitions().size() > 0 &&
transition_group.hasTransition(native_id) &&
!transition_group.getTransition(native_id).isDetectingTransition() )
{
continue;
Expand Down Expand Up @@ -209,7 +209,7 @@ namespace OpenMS
bool skip = false;
for (Size j = 0; j < i; j++)
{
if ((double)mrm_feature.getMetaValue("leftWidth") >= (double)features[j].getMetaValue("leftWidth") &&
if ((double)mrm_feature.getMetaValue("leftWidth") >= (double)features[j].getMetaValue("leftWidth") &&
(double)mrm_feature.getMetaValue("rightWidth") <= (double)features[j].getMetaValue("rightWidth") )
{ skip = true; }
}
Expand Down Expand Up @@ -274,7 +274,7 @@ namespace OpenMS
// Check for minimal peak width -> return empty feature (Intensity zero)
if (use_consensus_)
{
if (min_peak_width_ > 0.0 && std::fabs(best_right - best_left) < min_peak_width_)
if (min_peak_width_ > 0.0 && std::fabs(best_right - best_left) < min_peak_width_)
{
return mrmFeature;
}
Expand All @@ -283,7 +283,7 @@ namespace OpenMS
{
String outlier = "none";
double qual = computeQuality_(transition_group, picked_chroms, chr_idx, best_left, best_right, outlier);
if (qual < min_qual_)
if (qual < min_qual_)
{
return mrmFeature;
}
Expand Down Expand Up @@ -335,13 +335,13 @@ namespace OpenMS
return mrmFeature;
}

/**
/**

@brief Apex-based peak picking

Pick the peak with the closest apex to the consensus apex for each
chromatogram. Use the closest peak for the current peak.
chromatogram. Use the closest peak for the current peak.

Note that we will only set the closest peak per chromatogram to zero, so
if there are two peaks for some transitions, we will have to get to them
later. If there is no peak, then we transfer transition boundaries from
Expand All @@ -350,7 +350,7 @@ namespace OpenMS
template <typename SpectrumT>
void pickApex(std::vector<SpectrumT>& picked_chroms,
const double best_left, const double best_right, const double peak_apex,
double &min_left, double &max_right,
double &min_left, double &max_right,
std::vector< double > & left_edges, std::vector< double > & right_edges)
{
for (Size k = 0; k < picked_chroms.size(); k++)
Expand All @@ -361,16 +361,16 @@ namespace OpenMS
{
PeakIntegrator::PeakArea pa_tmp = pi_.integratePeak( // get the peak apex
picked_chroms[k],
picked_chroms[k].getFloatDataArrays()[PeakPickerMRM::IDX_LEFTBORDER][i],
picked_chroms[k].getFloatDataArrays()[PeakPickerMRM::IDX_RIGHTBORDER][i]);
picked_chroms[k].getFloatDataArrays()[PeakPickerMRM::IDX_LEFTBORDER][i],
picked_chroms[k].getFloatDataArrays()[PeakPickerMRM::IDX_RIGHTBORDER][i]);
if (pa_tmp.apex_pos > 0.0 && std::fabs(pa_tmp.apex_pos - peak_apex) < peak_apex_dist_min)
{ // update best candidate
peak_apex_dist_min = std::fabs(pa_tmp.apex_pos - peak_apex);
min_dist = (int)i;
}
}

// Select master peak boundaries, or in the case we found at least one peak, the local peak boundaries
// Select master peak boundaries, or in the case we found at least one peak, the local peak boundaries
double l = best_left;
double r = best_right;
if (min_dist >= 0)
Expand Down Expand Up @@ -424,7 +424,7 @@ namespace OpenMS
local_right = right_edges[k];
}

const SpectrumT& chromatogram = selectChromHelper_(transition_group, transition_group.getTransitions()[k].getNativeID());
const SpectrumT& chromatogram = selectChromHelper_(transition_group, transition_group.getTransitions()[k].getNativeID());
if (transition_group.getTransitions()[k].isDetectingTransition())
{
for (typename SpectrumT::const_iterator it = chromatogram.begin(); it != chromatogram.end(); it++)
Expand All @@ -434,7 +434,7 @@ namespace OpenMS
}

// Compute total intensity on transition-level
double transition_total_xic = 0;
double transition_total_xic = 0;

for (typename SpectrumT::const_iterator it = chromatogram.begin(); it != chromatogram.end(); it++)
{
Expand Down Expand Up @@ -486,7 +486,7 @@ namespace OpenMS
}
else if (peak_integration_ == "smoothed")
{
if (smoothed_chroms.size() <= k)
if (smoothed_chroms.size() <= k)
{
throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Tried to calculate peak area and height without any smoothed chromatograms");
Expand All @@ -497,7 +497,7 @@ namespace OpenMS
{
throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
String("Peak integration chromatogram ") + peak_integration_ + " is not a valid method for MRMTransitionGroupPicker");
}
}

Feature f;
double quality = 0;
Expand Down Expand Up @@ -838,7 +838,7 @@ namespace OpenMS
for (Size k = 0; k < picked_chroms.size(); k++)
{
const SpectrumT& chromatogram = selectChromHelper_(transition_group, picked_chroms[k].getNativeID());
const SpectrumT used_chromatogram = resampleChromatogram_(chromatogram,
const SpectrumT used_chromatogram = resampleChromatogram_(chromatogram,
master_peak_container, best_left - resample_boundary, best_right + resample_boundary);

std::vector<double> int_here;
Expand Down Expand Up @@ -955,7 +955,7 @@ namespace OpenMS

double score = shape_score - coel_score - 1.0 * missing_peaks / picked_chroms.size();

OPENMS_LOG_DEBUG << " Computed score " << score << " (from " << shape_score <<
OPENMS_LOG_DEBUG << " Computed score " << score << " (from " << shape_score <<
" - " << coel_score << " - " << 1.0 * missing_peaks / picked_chroms.size() << ")" << std::endl;

return score;
Expand All @@ -973,7 +973,7 @@ namespace OpenMS
template <typename SpectrumT>
void recalculatePeakBorders_(const std::vector<SpectrumT>& picked_chroms, double& best_left, double& best_right, double max_z)
{
// 1. Collect all seeds that lie within the current seed
// 1. Collect all seeds that lie within the current seed
// - Per chromatogram only the most intense one counts, otherwise very
// - low intense peaks can contribute disproportionally to the voting
// - procedure.
Expand Down Expand Up @@ -1013,7 +1013,7 @@ namespace OpenMS
return;
}

// FEATURE IDEA: instead of Z-score use modified Z-score for small data sets
// FEATURE IDEA: instead of Z-score use modified Z-score for small data sets
// http://d-scholarship.pitt.edu/7948/1/Seo.pdf
// http://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm
// 1. calculate median
Expand All @@ -1031,8 +1031,8 @@ namespace OpenMS
/ right_borders.size() - mean * mean);
std::sort(right_borders.begin(), right_borders.end());

OPENMS_LOG_DEBUG << " - Recalculating right peak boundaries " << mean << " mean / best "
<< best_right << " std " << stdev << " : " << std::fabs(best_right - mean) / stdev
OPENMS_LOG_DEBUG << " - Recalculating right peak boundaries " << mean << " mean / best "
<< best_right << " std " << stdev << " : " << std::fabs(best_right - mean) / stdev
<< " coefficient of variation" << std::endl;

// Compare right borders of best transition with the mean
Expand All @@ -1048,8 +1048,8 @@ namespace OpenMS
/ left_borders.size() - mean * mean);
std::sort(left_borders.begin(), left_borders.end());

OPENMS_LOG_DEBUG << " - Recalculating left peak boundaries " << mean << " mean / best "
<< best_left << " std " << stdev << " : " << std::fabs(best_left - mean) / stdev
OPENMS_LOG_DEBUG << " - Recalculating left peak boundaries " << mean << " mean / best "
<< best_left << " std " << stdev << " : " << std::fabs(best_left - mean) / stdev
<< " coefficient of variation" << std::endl;

// Compare left borders of best transition with the mean
Expand Down Expand Up @@ -1130,8 +1130,7 @@ namespace OpenMS
if (end != chromatogram.end()) {end++;}

SpectrumT resampled_peak_container = master_peak_container; // copy the master container, which contains the RT values
LinearResamplerAlign lresampler;
lresampler.raster(begin, end, resampled_peak_container.begin(), resampled_peak_container.end());
lresampler_.raster(begin, end, resampled_peak_container.begin(), resampled_peak_container.end());

return resampled_peak_container;
}
Expand Down Expand Up @@ -1164,6 +1163,7 @@ namespace OpenMS

PeakPickerMRM picker_;
PeakIntegrator pi_;
LinearResamplerAlign lresampler_;
};
}

Expand Down
Loading