Logo Search packages:      
Sourcecode: massxpert version File versions

pkaPhPi.cpp

/* massXpert - the true massist's program.
   --------------------------------------
   Copyright(C) 2006,2007 Filippo Rusconi

   http://www.massxpert.org/massXpert

   This file is part of the massXpert project.

   The massxpert project is the successor to the "GNU polyxmass"
   project that is an official GNU project package(see
   www.gnu.org). The massXpert project is not endorsed by the GNU
   project, although it is released ---in its entirety--- under the
   GNU General Public License. A huge part of the code in massXpert
   is actually a C++ rewrite of code in GNU polyxmass. As such
   massXpert was started at the Centre National de la Recherche
   Scientifique(FRANCE), that granted me the formal authorization to
   publish it under this Free Software License.

   This software is free software; you can redistribute it and/or
   modify it under the terms of the GNU  General Public
   License version 3, as published by the Free Software Foundation.
   

   This software is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this software; if not, write to the

   Free Software Foundation, Inc.,

   51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*/


/////////////////////// Std includes
#include <cmath>


/////////////////////// Local includes
#include "pkaPhPi.hpp" 


namespace massXpert
{

  PkaPhPi::PkaPhPi(Polymer &polymer, 
                CalcOptions &calcOptions,
                QList<Monomer *> *monomerList,
                QList<Modif *> *modifList)
    : m_polymer(polymer), 
      m_calcOptions(calcOptions),
      mpa_monomerList(monomerList),
      mpa_modifList(modifList)
  {
    m_ph = 7;
    m_pi = 7;
  }


  PkaPhPi::~PkaPhPi()
  {
    if (mpa_monomerList)
      {
      while(!mpa_monomerList->isEmpty())
        delete mpa_monomerList->takeFirst();  
      
      delete mpa_monomerList;
      mpa_monomerList = 0;
      }
  
    if (mpa_modifList)
      {
      while(!mpa_modifList->isEmpty())
        delete mpa_modifList->takeFirst();
      
      delete mpa_modifList;
      
      mpa_modifList = 0;
      }
  
  }


  void  
  PkaPhPi::setPh(double ph)
  {
    Q_ASSERT(ph > 0 && ph < 14);
  
    m_ph = ph;
  }


  double 
  PkaPhPi::ph()
  {
    return m_ph;
  }


  double 
  PkaPhPi::pi()
  {
    return m_pi;
  }


  double 
  PkaPhPi::positiveCharges()
  {
    return m_positiveCharges;
  }


  double 
  PkaPhPi::negativeCharges()
  {
    return m_negativeCharges;
  }


  void
  PkaPhPi::setCalcOptions(const CalcOptions &calcOptions)
  {
    m_calcOptions = calcOptions;
  }


  void 
  PkaPhPi::setMonomerList(QList<Monomer *> *monomerList)
  {
    Q_ASSERT(monomerList);
  
    mpa_monomerList = monomerList;
  }


  void 
  PkaPhPi::setModifList(QList<Modif *> *modifList)
  {
    Q_ASSERT(modifList);
  
    mpa_modifList = modifList;  
  }


  int
  PkaPhPi::calculateCharges()
  {
    int processedChemicalGroups = 0;
  
    m_positiveCharges = 0;
    m_negativeCharges = 0;
  
    // We of course need monomers ! Instead, we may not need modifs.
    if (!mpa_monomerList)
      return -1;
  
    int polymerSize = m_polymer.size();
    
    // The general scheme is :

    // Get the list of the coordinates of the different region
    // selections. For each first monomer and end monomer of a given
    // region selection, check if the the region is an oligomer or a
    // residual chain(m_selectionType of CalcOptions); act
    // accordingly. Also, check for each selection region if it
    // encompasses the polymer left/right end. If the left/right end
    // modifications are to be taken into account, act accordingly.

    const CoordinateList &coordinateList = m_calcOptions.coordinateList();
         
    for (int iter = 0; iter < coordinateList.size(); ++iter)
      {
      Coordinates *coordinates = coordinateList.at(iter);
      
      int startIndex = coordinates->start();
      int endIndex = coordinates->end();

      bool leftMostCoordinates = 
        coordinateList.isLeftMostCoordinates(coordinates);
      bool rightMostCoordinates = 
        coordinateList.isRightMostCoordinates(coordinates);
      
      for(int jter = startIndex; jter < endIndex + 1; ++jter)
        {
          const Monomer *seqMonomer = m_polymer.at(jter);
            
//        qDebug() << __FILE__ << __LINE__
//                << "-- Monomer:" << seqMonomer->name() 
//                << "position:" << jter + 1;
          
          // Find a monomer by the same code in our list of monomers
          // that have been fed with chemical group data. Note that
          // all the monomers in a given sequence must not
          // necessarily have a counterpart in the local list of
          // monoemers. For example, there might be cases in which a
          // given monomer might not bring any charge whatsoever.
          
          int index = Monomer::isCodeInList(seqMonomer->code(),
                                     *mpa_monomerList);
          if (index == -1)
            return -1;
          
          const Monomer *monomer = mpa_monomerList->at(index);
          Q_ASSERT(monomer);
          
          // A monomer can have multiple such "CHEMICAL_GROUP"
          // properties. Indeed, for example for proteins, a monomer
          // might have three such chemical groups(and thus three
          // Prop objects): one for the alpha NH2, one for the alpha
          // COOH and one for a residual chain chemical group, like
          // epsilon NH2 for lysine.
      
          for (int kter = 0; kter < monomer->propList().size(); ++kter)
            {
            Prop *prop = monomer->propList().at(kter);
        
            if(prop->name() != "CHEMICAL_GROUP")
              continue;
            
//          qDebug() << __FILE__ << __LINE__
//                  << "Monomer has property CHEMICAL_GROUP...";
            
            // Get the chemical group out of the property.
            
            ChemicalGroup *chemicalGroup = 
              static_cast<ChemicalGroup *>(prop->data());
        
            if(chemicalGroup->polRule() & MXP_CHEMGROUP_LEFT_TRAPPED)
              {
//              qDebug() << __FILE__ << __LINE__
//                      << "... that is MXP_CHEMGROUP_LEFT_TRAPPED";

                // The chemical group we are dealing with is trapped
                // when the monomer is polymerized on the left end, that
                // is if the monomer is not the left end monomer of the
                // sequence being analyzed.

                // Thus we only can take it into account if one of
                // two conditions are met:

                // 1. The monomer is the left end monomer of the
                // whole polymer sequence.

                // 2. The monomer is the left end monomer of the
                // region selection AND the selection type is
                // oligomers(thus it does not get polymerized to
                // the previous selection region).

                if (jter > 0)
                  {
                  // Clearly we are not dealing with the left
                  // end of the polymer, so check if we have to
                  // account for this chemical group or not.
                  
                  if(!leftMostCoordinates)
                    {
                      // The current Coordinates is not the
                      // left-most Coordinates in the
                      // CoordinateList, thus we cannot consider
                      // it to be the "left end coordinates" of
                      // the CoordinateList. Just continue
                      // without exploring any more.
                      continue;
                    }
                  if(jter == startIndex)
                    {
                      // The current monomer is the first
                      // monomer of Coordinates. We only take
                      // into account the chemical group if each
                      // Coordinates is to be considered an
                      // oligomer.

                      if (m_calcOptions.selectionType() !=
                        SELECTION_TYPE_OLIGOMERS)
                        continue;
                    }
                  }
              }
            
            if(chemicalGroup->polRule() & MXP_CHEMGROUP_RIGHT_TRAPPED)
              {
//              qDebug() << __FILE__ << __LINE__
//                      << "... that is MXP_CHEMGROUP_RIGHT_TRAPPED";

                // See explanations above.

                if (jter < polymerSize - 1)
                  {
                  // Clearly, we are not dealing with the right
                  // end of the polymer.

                  if(!rightMostCoordinates)
                    {
                      // The current Coordinates is not the
                      // right-most Coordinates of the
                      // CoordinateList, thus we cannot consider
                      // it to be the "right end coordinates" of
                      // the CoordinateList. Just continue
                      // without exploring anymore.
                      continue;
                    }
                  if(jter == endIndex)
                    {
                      // The current monomer is the last monomer
                      // of Coordinates. We only take into
                      // account the chemical group if each
                      // Coordinates is to be considered an
                      // oligomer(and not a residual chain).
                      
                      if (m_calcOptions.selectionType() !=
                        SELECTION_TYPE_OLIGOMERS)
                        continue;
                    }
                  }
              }
            
            if(iter == 0 &&
                m_calcOptions.polymerEntities() & 
                MXT_POLYMER_CHEMENT_LEFT_END_MODIF)
              {
                // We are iterating in the monomer that is at the
                // beginning of the polymer sequence. We should
                // test if the chemical group we are dealing with
                // right now has a special rule for the left end
                // of the polymer sequence region.
            
                int ret =
                  accountPolymerEndModif(MXT_POLYMER_CHEMENT_LEFT_END_MODIF,
                                    *chemicalGroup);
                if (ret >= 0)
                  {
//                qDebug() << __FILE__ << __LINE__
//                        << "Accounted for left end modif.";
                  
                  processedChemicalGroups += ret;
                  continue;
                  }
              }
        
            if(iter == polymerSize -1 &&
                m_calcOptions.polymerEntities() & 
                MXT_POLYMER_CHEMENT_RIGHT_END_MODIF)
              {
                int ret =
                  accountPolymerEndModif(MXT_POLYMER_CHEMENT_RIGHT_END_MODIF,
                                    *chemicalGroup);
                if (ret >= 0)
                  {
//                qDebug() << __FILE__ << __LINE__
//                        << "Accounted for right end modif.";
                  
                  processedChemicalGroups += ret;
                  continue;
                  }
              }
        
            if(m_calcOptions.monomerEntities() &
                MXT_MONOMER_CHEMENT_MODIF && 
                seqMonomer->isModified())
              {
                int ret = accountMonomerModif(*seqMonomer, *chemicalGroup);
                if (ret >= 0)
                  {
//                qDebug() << __FILE__ << __LINE__
//                        << "Accounted for monomer modif.";
                  
                  processedChemicalGroups += ret;
                  continue;
                  }
              }
      
            double charge = 
              calculateChargeRatio(chemicalGroup->pka(),
                              chemicalGroup->isAcidCharged());

//          qDebug() << __FILE__ << __LINE__ 
//                  << "Charge:" << charge;
        
            if(charge < 0)
              m_negativeCharges += charge;
            else if (charge > 0)
              m_positiveCharges += charge;
        
//          qDebug() << __FILE__ << __LINE__ 
//                  << "Pos =" << m_positiveCharges
//                  << "Neg = " << m_negativeCharges;
            
            ++processedChemicalGroups;
            }
          // End of 
          // for (int kter = 0; kter < monomer->propList().size(); ++kter)

//        qDebug() << __FILE__ << __LINE__
//                << "End dealing with Monomer:" << seqMonomer->name() 
//                << "position:" << jter + 1;
        }
      // End of
      // for (int jter = startIndex; jter < endIndex + 1; ++jter)
      
//    qDebug() << __FILE__ << __LINE__
//            << "End dealing with Coordinates";
      }
    // End of
    // for (int iter = 0; iter < coordinateList.size(); ++iter)

    // We have finished processing all the Coordinates in the list.
    
    return processedChemicalGroups;
  }


  int
  PkaPhPi::calculatePi()
  {
    int processedChemicalGroups = 0;
    int iteration = 0;

    double netCharge = 0;
    double firstCharge = 0;
    double thirdCharge = 0;
  
    // We of course need monomers ! Instead, we may not need modifs.
    if (!mpa_monomerList)
      {
      m_pi = 0;
      m_ph= 0;
      
      return -1;
      }

    m_ph = 0;

    while(true)
      {
      //       qDebug() << "Current pH being tested:" << m_ph;
      
      processedChemicalGroups = calculateCharges();
      
      if(processedChemicalGroups == -1)
        {
          qDebug() << "Failed to calculate net charge for pH" << m_ph;
        
          m_pi = 0;
          m_ph= 0;
        
          return -1;
        }
      
      netCharge = m_positiveCharges + m_negativeCharges;

      // Note that if the 0.01 tested_ph step is enough to switch the
      // net charge from one excess value to another excess value in
      // the opposite direction, we'll enter an infinite loop.
      //
      // The evidence for such loop is that every other two measures,
      // the net charge of the polymer sequence will be the same.
      //
      // Here we test this so that we can break the loop.
      
            
      ++iteration;

      if(iteration == 1)
        {
          firstCharge = netCharge;
        }
      else if (iteration == 3)
        {
          thirdCharge = netCharge;

          if (firstCharge == thirdCharge)
            break;
        
          iteration = 0;
        
          firstCharge = netCharge;
        }
      
      // At this point we have to test the net charge:

      if(netCharge >= -0.1 && netCharge <= 0.1)
        {
          //        qDebug() << "Breaking loop with netCharge:" << netCharge;
        
          break;
        }
      
      if(netCharge > 0)
        {
          // The test ph is too low.

          m_ph += 0.01;
          //        qDebug() << "Set new pH m_ph += 0.01:" << m_ph 
          //                << "netCharge:" << netCharge;
              
          continue;
        }
      
      if(netCharge < 0)
        {
          // The test ph is too high.
        
          m_ph -= 0.01;
          //        qDebug() << "Set new pH m_ph -= 0.01:" << m_ph
          //                << "netCharge:" << netCharge;
        
          continue;
        }
      }
    // End of
    // while(true)

    // At this point m_pi is m_ph.

    m_pi = m_ph;
    //   qDebug() << "pi is:" << m_pi;
  

    return processedChemicalGroups;
  }


  double 
  PkaPhPi::calculateChargeRatio(double pka, bool acidCharged)
  {
    double aOverAh = 0;
  
    if (pka < 0)
      return 0;
    if (pka > 14)
      return 0;
  
    if (m_ph < 0)
      return 0;
    if (m_ph > 14)
      return 0;
    

    // Example with pKa = 4.25(Glu) ; pH = 4.16, thus we are more
    // acidic than pKa, we expect AH to be greater than A by a small
    // margin.

    aOverAh =(double) pow(10,(m_ph - pka));
    // aOverAh =  0.81283051616409951(confirmed manually)
  
    if (aOverAh < 1)
      {
      /* The solution contains more acid forms(AH) than basic forms
        (A).
      */
      if(acidCharged)
        return(1 - aOverAh);
      else
        // The acid is not charged, that is, it is a COOH.
        // AH = 1 - A
        // A = aOverAh.AH
        // A = aOverAh.(1-A)
        // A = aOverAh - aOverAh.A
        // A(1+aOverAh) = aOverAh
        // A = aOverAh /(1+aOverAh), for us this is 
        // A = 0.81283 / 1.81283 = 0.448

        // And not - aOverAh, that is - aOverAh.
      
        // Below seems faulty(20071204)
        // return(- aOverAh);
      
        // Tentative correction(20071204)
        return(-(aOverAh /(1 + aOverAh)));
      }
  
    else if (aOverAh > 1)
      {
      /* The solution contains more basic forms(A) than acid forms
        (AH).
      */
      if(acidCharged)
        return(1 / aOverAh);
      else
        return(-(1 -(1 / aOverAh)));
      }
    else if (aOverAh == 1)
      {
      /* The solution contains as many acid forms(AH) as basic forms
        (H).
      */
      if(acidCharged)
        return(aOverAh / 2);
      else
        return(- aOverAh / 2);
      }
    else
      Q_ASSERT(0);
  
    return 0;
  }


  int 
  PkaPhPi::accountPolymerEndModif(int endModif, 
                           const ChemicalGroup &group)
  {
    QString modifName;
    ChemicalGroupRule *rule = 0;
  
    int count = 0;
  
    // Get the name of the modification of the polymer(if any) and get
    // the rule dealing with that polymer modification(if any).

    if (endModif == MXT_POLYMER_CHEMENT_LEFT_END_MODIF)
      {
      modifName = m_polymer.leftEndModif().name();
      
      rule = group.findRule("LE_PLM_MODIF", modifName);
      
      // Remember a chemical group is defined like this:

      //       <mnmchemgroup>
      //         <name>N-term NH2</name>
      //    <pka>9.6</pka>
      //    <acidcharged>TRUE</acidcharged>
      //    <polrule>left_trapped</polrule>
      //    <chemgrouprule>
      //      <entity>LE_PLM_MODIF</entity>
      //      <name>Acetylation</name>
      //      <outcome>LOST</outcome>
      //    </chemgrouprule>
      //       </mnmchemgroup>

      }
    else if (endModif == MXT_POLYMER_CHEMENT_RIGHT_END_MODIF)
      {
      modifName = m_polymer.rightEndModif().name();
      
      rule = group.findRule("RE_PLM_MODIF", modifName);
      }
    else
      Q_ASSERT(0);
  

    // The polymer might not be modified, and also the chemical group
    // passed as parameter might not contain any rule about any polymer
    // modification. In that case we just have nothing to do.

    if (modifName.isEmpty())
      {
      if(rule)
        {
          double charge = calculateChargeRatio(group.pka(),
                                      group.isAcidCharged());
          if (charge < 0)
            m_negativeCharges += charge;
          else if (charge > 0)
            m_positiveCharges += charge;
        
          return ++count;
        }
      else
        {
          // The polymer end was NOT modified and the chemical group
          // was NOT eligible for a polymer end modification. This
          // means that we do not have to process it, and we return -1
          // so that the caller function knows we did not do anything
          // and that this chemical group should continue to undergo
          // analysis without skipping it.

          return -1;
        }
      }
    // End of 
    // if (modifName.isEmpty())

    if (!rule)
      {
      // This chemical group was not "designed" to receive any polymer
      // end modification, so we have nothing to do with it and we
      // return -1 so that the caller function knows we did not do
      // anything and that this chemical group should continue to
      // undergo analysis without skipping it.
      
      return -1;
      }
  
    // At this point we know that the chemical group 'group' we are
    // analyzing is eligible for a polymer left end modification and
    // that it is indeed modified with a correcct modification. So we
    // have a rule for it. Let's continue the analysis.

    // Apparently the rule has data matching the ones we are looking
    // for. At this point we should now what action to take for this
    // group.

    if (rule->outcome() == MXP_CHEMGROUP_RULE_LOST)
      {
      // We do not use the current chemical group 'group' because the
      // polymer end's modification has abolished it.
      }
    else if (rule->outcome() == MXP_CHEMGROUP_RULE_PRESERVED)
      {
      double charge = calculateChargeRatio(group.pka(),
                                    group.isAcidCharged());
      if(charge < 0)
        m_negativeCharges += charge;
      else if (charge > 0)
        m_positiveCharges += charge;
      
      return ++count;
      }
    else
      Q_ASSERT(0);
  
    // Whatever we should do with the left/right end monomer's chemgroup,
    // we should take into account the modification itself that might
    // have brought chemgroup(s) worth calculating their intrinsic
    // charges!

    //  Find a modif object in the local list of modif objects, that has
    // the same name as the modification with which the left/right end
    // of the polymer is modified. We'll see what chemgroup(s) this
    // modification brings to the polymer sequence.
                    
    int index = Modif::isNameInList(modifName, *mpa_modifList);
  
    if (index == -1)
      {
      //       qDebug() << __FILE__ << __LINE__ 
      //          << "Information: following modif not in local list:" 
      //          << modifName;
      
      return count;
      }
  
    const Modif *modif = mpa_modifList->at(index);
    Q_ASSERT(modif);
  
    for (int jter = 0; jter < modif->propList().size(); ++jter)
      {
      Prop *prop = modif->propList().at(jter);
      
      if(prop->name() != "CHEMICAL_GROUP")
        continue;
      
      // Get the chemical group out of the property.
      
      const ChemicalGroup *chemicalGroup = 
        static_cast<const ChemicalGroup *>(prop->data());
      
      double charge = 
        calculateChargeRatio(chemicalGroup->pka(),
                        chemicalGroup->isAcidCharged());
      if(charge < 0)
        m_negativeCharges += charge;
      else if (charge > 0)
        m_positiveCharges += charge;
      
      ++count;
      }
  
    return count;
  }


  int 
  PkaPhPi::accountMonomerModif(const Monomer &monomer, 
                        ChemicalGroup &group)
  {
    QString modifName;
    ChemicalGroupRule *rule = 0;
  
    int count = 0;

    // For each modification in the monomer, make the accounting work.

    Q_ASSERT(mpa_modifList);
    Q_ASSERT(mpa_modifList->size());

    for (int iter = 0; iter < monomer.modifList()->size(); ++iter)
      {
      Modif *iterModif = monomer.modifList()->at(iter);
      
      // Get the name of the modification of the monomer(if any) and get
      // the rule dealing with that monomer modification(if any).
      
      modifName = iterModif->name();
      
      rule = group.findRule("MONOMER_MODIF", modifName);
      
      if(modifName.isEmpty())
        {
          // The monomer does not seem to be modified. However, we still
          // have to make sure that the chemgroup that we were parsing is
          // actually a chemgroup suitable for a modif.  If this chemgroup
          // was actually suitable for a monomer modif, but it is not
          // effectively modified, that means that we have to calculate
          // the charge for the non-modified chemgroup...
        
          if (rule)
            {
            double charge = calculateChargeRatio(group.pka(),
                                          group.isAcidCharged());
            if(charge < 0)
              m_negativeCharges += charge;
            else if (charge > 0)
              m_positiveCharges += charge;
            
            return ++count;
            }
          else
            {
            // The current monomer was NOT modified, and the chemgroup
            // was NOT eligible for a monomer modification. This means
            // that we do not have to process it, and we return -1 so
            // that the caller function knows we did not do anything and
            // that this chemgroup should continue to undergo analysis
            // without skipping it.
            
            return -1;
            }
        }
      // End of 
      // if (modifName.isEmpty())
      
      if(!rule)
        {
          // This chemgroup was not "designed" to receive any
          // modification, so we have nothing to do with it, and we return
          // -1 to let the caller know that its processing should be
          // continued in the caller's function space.
        
          return -1;
        }
      
      // At this point, we know that the chemgroup we are analyzing is
      // eligible for a modification and that we have a rule for it. Let's
      // continue the analysis:
      
      // Apparently, a rule object has member data matching the ones we
      // were looking for. At this point we should know what action to
      // take for this chemgroup.
      
      if(rule->outcome() == MXP_CHEMGROUP_RULE_LOST)
        {
          // We do not use the current chemical group 'group' because the
          // monomer modification has abolished it.
        }
      else if (rule->outcome() == MXP_CHEMGROUP_RULE_PRESERVED)
        {
          double charge = calculateChargeRatio(group.pka(),
                                      group.isAcidCharged());
          if (charge < 0)
            m_negativeCharges += charge;
          else if (charge > 0)
            m_positiveCharges += charge;
      
          return ++count;
        }
      else
        Q_ASSERT(0);
  
      // Whatever we should do with this monomer's chemgroup, we should
      // take into account the modification itself that might have brought
      // chemgroup(s) worth calculating their intrinsic charges!
     
      // Find a modif object in the local list of modif objects, that has
      // the same name as the modification with which the monomer is
      // modified. We'll see what chemgroup(s) this modification brings to
      // the polymer sequence.
  
      int index = Modif::isNameInList(modifName, *mpa_modifList);
  
      if(index == -1)
        {
          //       qDebug() << __FILE__ << __LINE__ 
          //            << "Information: following modif not in local list:" 
          //            << modifName;
      
          return count;
        }
  
      Modif *modif = mpa_modifList->at(index);
      Q_ASSERT(modif);
  
      for(int jter = 0; jter < modif->propList().size(); ++jter)
        {
          Prop *prop = modif->propList().at(jter);
        
          if (prop->name() != "CHEMICAL_GROUP")
            continue;
        
          // Get the chemical group out of the property.
        
          const ChemicalGroup *chemicalGroup = 
            static_cast<const ChemicalGroup *>(prop->data());
        
          double charge = 
            calculateChargeRatio(chemicalGroup->pka(),
                            chemicalGroup->isAcidCharged());
          if (charge < 0)
            m_negativeCharges += charge;
          else if (charge > 0)
            m_positiveCharges += charge;
        
          ++count;
        }
      }
  
    return count;
  }

} // namespace massXpert

Generated by  Doxygen 1.6.0   Back to index