Logo Search packages:      
Sourcecode: massxpert version File versions

polymer.cpp

/* massXpert - the true massist's program.
   --------------------------------------
   Copyright(C) 2006,2007 Filippo Rusconi

   http://www.massxpert.org/massXpert

   This file is part of the massXpert project.

   The massxpert project is the successor to the "GNU polyxmass"
   project that is an official GNU project package(see
   www.gnu.org). The massXpert project is not endorsed by the GNU
   project, although it is released ---in its entirety--- under the
   GNU General Public License. A huge part of the code in massXpert
   is actually a C++ rewrite of code in GNU polyxmass. As such
   massXpert was started at the Centre National de la Recherche
   Scientifique(FRANCE), that granted me the formal authorization to
   publish it under this Free Software License.

   This software is free software; you can redistribute it and/or
   modify it under the terms of the GNU  General Public
   License version 3, as published by the Free Software Foundation.
   

   This software is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this software; if not, write to the

   Free Software Foundation, Inc.,

   51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*/


/////////////////////// Local includes
#include "polymer.hpp"
#include "modif.hpp"
#include "fragSpec.hpp"
#include "cleaveSpec.hpp"


namespace massXpert
{

  const int POL_SEQ_FILE_FORMAT_VERSION = 5;
  
  //! Constructs a polymer.
00051   Polymer::Polymer(const PolChemDef *polChemDef, 
                const QString &name,
                const QString &code, 
                const QString &author)
    : Ionizable(polChemDef, name),
      m_code(code), m_author(author),
      m_leftEndModif(polChemDef, "NOT_SET"),
      m_rightEndModif(polChemDef, "NOT_SET")
  {
  }


  //! Destroys the polymer.
  /*! First, decrements the reference count of the corresponding polymer
    chemistry definition.
  */
00067   Polymer::~Polymer()
  {
    // Decrement the reference count to the polymer definition
    // chemistry. This will take care of freeing the definition when
    // reference count is 0.
    const_cast<PolChemDef *>(mp_polChemDef)->decrementRefCount();
  }
  

  //! Sets the code.
  /*!   
    \param code The new code.
  */
  void 
00081   Polymer::setCode(const QString &code)
  {
    m_code = code;
  }


  //! Returns the code.
  /*! 
    \return The code.
  */
  QString
00092   Polymer::code() const
  {
    return m_code;
  }


  //! Sets the user name.
  /*! 
  
    \param author The new user name.
  */
  void 
00104   Polymer::setAuthor(const QString &author)
  {
    m_author = author;
  }


  //! Returns the user name.
  /*! 
    \return The user name.
  */
  QString 
00115   Polymer::author() const
  {
    return m_author;
  }


  //! Sets the file path.
  /*! 
    \param filePath New file path.
  */
  void 
00126   Polymer::setFilePath(const QString &filePath)
  {
    m_filePath = filePath;
  }


  //! Returns the file path.
  /*! 
    \return The file path.
  */
  QString 
00137   Polymer::filePath() const
  {
    return m_filePath;
  }


  //! Sets the date and time(date, hours, minutes, seconds)
  /*!  \param dateTime The new date and time.
   */
  void 
00147   Polymer::setDateTime(const QString &dateTime)
  {
    m_dateTime = QDateTime::fromString((QString) dateTime,
                              "yyyy-MM-dd:mm:ss");
  }


  //! Returns the date and time in a string form.
  /*! 
    \return The date and time in a string form.
  */
  QString
00159   Polymer::dateTime() const
  {
    return m_dateTime.toString("yyyy-MM-dd:mm:ss");
  }



  bool
  Polymer::setLeftEndModif(const QString &name)
  {
    const QList<Modif *> &refList = polChemDef()->modifList();

    if (name.isNull() || name.isEmpty())
      {
      // Reset the modif to nothing.
      m_leftEndModif.reset();
      }
  
    if (Modif::isNameInList(name, refList, &m_leftEndModif) != -1)
      {
      if(!m_leftEndModif.calculateMasses())
        return false;
      else 
        return true;
      }
    else
      return false;
  }


  bool 
  Polymer::setLeftEndModif(const Modif &modif)
  {
    m_leftEndModif = modif;
  
    if (!m_leftEndModif.calculateMasses())
      return false;
    else 
      return true;
  }


  const Modif &
  Polymer::leftEndModif() const
  {
    return m_leftEndModif;
  }



  bool
  Polymer::setRightEndModif(const QString &name)
  {
    const QList<Modif *> &refList = polChemDef()->modifList();

    if (name.isNull() || name.isEmpty())
      {
      // Reset the modif to nothing.
      m_rightEndModif.reset();
      }
  
    if (Modif::isNameInList(name, refList, &m_rightEndModif) != -1)
      {
      if(!m_rightEndModif.calculateMasses())
        return false;
      else 
        return true;
      }
    else
      return false;
  }


  bool 
  Polymer::setRightEndModif(const Modif &modif)
  {
    m_rightEndModif = modif;
  
    if (!m_rightEndModif.calculateMasses())
      return false;
    else 
      return true;
  }


  const Modif &
  Polymer::rightEndModif() const
  {
    return m_rightEndModif;
  }


  const CrossLinkList &
  Polymer::crossLinkList() const
  {
    return m_crossLinkList;
  }


  CrossLinkList *
  Polymer::crossLinkListPtr()
  {
    return &m_crossLinkList;
  }


  bool 
  Polymer::prepareMonomerRemoval(Monomer *monomer)
  {
    Q_ASSERT(monomer);

    // We are asked to destroy **all** the crossLinks that involve the
    // 'monomer'.
  
    // Iterate in the list of crossLinks, and for each crossLink check
    // if it involves 'monomer'.
  
    if (!m_crossLinkList.size())
      return true;
  
    int iter = m_crossLinkList.size();
  
    while(iter)
      {
      CrossLink *crossLink = m_crossLinkList.at(iter - 1);
      
      if(crossLink->involvesMonomer(monomer) != -1)
        {
          // The current crossLink involves the monomer for which all
          // the crossLinks should be destroyed.
        
          uncrossLink(crossLink);
          // The uncrossLinking takes care of removing the crossLink
          // from the list.
        }
      --iter;
      }
  
    return true;
  }


  bool 
00302   Polymer::removeMonomerAt(int index)
  {
    Q_ASSERT(index > -1);
    Q_ASSERT(index < size());
    
    Monomer *monomer = const_cast<Monomer *>(at(index));

    if (!prepareMonomerRemoval(monomer))
      return false;
    
    m_monomerList.removeAt(index);
    
    delete monomer;
    
    return true;
  }
  

  // MASS CALCULATION FUNCTIONS
  bool 
  Polymer::accountMasses(const CalcOptions &calcOptions)
  {
    // We do not want to reset masses prior to calculating the masses
    // because we are accounting them in the polymer itself.
    return calculateMasses(calcOptions, false);
  }


  bool 
  Polymer::accountMasses(Polymer *polymer,
                    const CalcOptions &calcOptions,
                    double *mono, double *avg)
  {
    // We do not want to reset masses prior to calculating the masses
    // because we are accounting them in the polymer itself.
    return calculateMasses(polymer, calcOptions, mono, avg, false);
  }

  
  bool 
  Polymer::calculateMasses(const CalcOptions &calcOptions, bool reset)
  {
    return calculateMasses(this, calcOptions, &m_mono, &m_avg, reset);
  }


  bool 
  Polymer::calculateMasses(Polymer *polymer, 
                      const CalcOptions &calcOptions,
                      double *mono, double *avg, bool reset)
  {
    Q_ASSERT(polymer);
    Q_ASSERT(mono && avg);
  
    int ret = 0;
    
    if (reset)
      {
      // Reset the masses to 0.
      *mono = 0;
      *avg = 0;
      }
  
    // The calcOptions parameter holds a CoordinateList instance
    // listing all the coordinates of the different(if any) region
    // selections of the polymer sequence. This CoordinateList is
    // never empty, as it should at least contain the pseudo-selection
    // of the sequence, that is [start of sequence, cursor index] or
    // the [-1, -1] values for whole sequence mass
    // calculation. Iterate in this CoordinateList and for each item
    // call this function.

    Q_ASSERT(calcOptions.coordinateList().size());

    // For each Coordinates item in the calcOptions.coordinateList()
    // list of such items, perform the mass calculation.
    
    
    for (int iter = 0; iter < calcOptions.coordinateList().size(); ++iter)
      {
      // New coordinates instance we are iterating into.
      Coordinates coordinates(*(calcOptions.coordinateList().at(iter)));
      
      // If the start value is less than 0(typically it is set to
      // -1) then set it to 0(the first monomer of the sequence.

      if(coordinates.start() < 0)
        coordinates.setStart(0);

      // If the end value is less than 0(typically it is set to -1)
      // then set it to be the size of the polymer so that all the
      // protein is taken into account in the calculation.

      if(coordinates.end() < 0)
        coordinates.setEnd(polymer->size() - 1);

      // If the end value is greater than the polymer size, set it
      // to the polymer size.
      if(coordinates.end() >= polymer->size())
        coordinates.setEnd(polymer->size() - 1);
      
      // First account for the residual chain masses.

//    qDebug() << __FILE__ << __LINE__
//            << "calculateMasses: accounting for residual chain indices"
//            << "[" <<  coordinates.start() << "--"
//            << coordinates.end() << "]";
      
      for(int jter = coordinates.start(); jter <= coordinates.end(); ++jter)
        {
          //          qDebug() << __FILE__ << __LINE__ 
          // << "Going to call at() with value" << jter;
          
          Monomer *monomer = const_cast<Monomer *>(polymer->at(jter)); 
          
          if (calcOptions.isDeepCalculation())
            monomer->calculateMasses(calcOptions.monomerEntities());
          
          monomer->accountMasses(mono, avg);
        }
      }
    
    // Even if we are not in the residual chain loop, we have to account
    // for the crossLinks, if so requires it. The crossLinks are a
    // monomer chemical entity, but because it is unpractical to
    // calculate their ponderable contribution in the loop above, we
    // deal with them here. This is difficult stuff. In fact, the
    // crossLinks, which in reality belong to at least two monomers
    //(monomers can be engaged in more than a crossLink), are not
    // stored as properties in the monomers(contrary to monomer
    // modifications, for example). The crossLinks are stored in a list
    // of such instances in the polymer(m_crossLinkList of CrossLink
    // pointers). Now, the point is: if one of the monomers of a
    // crossLink is selected but not the other partners, then what
    // should be do about that crossLink accounting ?
  
    if (calcOptions.monomerEntities() & MXT_MONOMER_CHEMENT_CROSS_LINK)
      {
      // We have to take into account the crossLinks. Hmmm... hard
      // task. The calculation is to be performed for the sequence
      // stretch from localStart to localEnd. We can iterate in the
      // crossLink list and for each crossLink check if it involves
      // monomers that *all* are contained in the sequence stretch
      //(or sequence stretches, that is a number of Coordinates
      // items in the calcOptions.coordinateList()) we're
      // calculating the mass of. If at least one monomer of any
      // crossLink is not contained in the [localStart--localEnd]
      // sequence stretch, than increment a count variable and do
      // not account the mass.
      
      const CrossLinkList &crossLinkList = polymer->crossLinkList();

      int crossLinkPartial = 0;

      for(int jter = 0; jter < crossLinkList.size(); ++jter)
        {
          CrossLink *crossLink = crossLinkList.at(jter);
        
          ret = crossLink->encompassedBy(calcOptions.coordinateList());
        
          if (ret == MXP_CROSS_LINK_ENCOMPASSED_FULL)
            {
//          qDebug() << __FILE__ << __LINE__
//                  << "CrossLink at iter:" << jter
//                  << "is fully encompassed";

            // The crossLink is fully encompassed by our monomer
            // stretch, so we should take it into account.

            ret = crossLink->accountMasses(mono, avg);
            
            Q_ASSERT(ret);
            }
          else if (ret == MXP_CROSS_LINK_ENCOMPASSED_PARTIAL)
            {
//          qDebug() << __FILE__ << __LINE__
//                  << "CrossLink at iter:" << jter
//                  << "is partially encompassed";
            
            ++crossLinkPartial;
            }
          else
            {
//          qDebug() << __FILE__ << __LINE__
//                  << "CrossLink at iter:" << jter
//                  << "is not encompassed at all";
            }
        }
      
      emit(polymer->crossLinksPartiallyEncompassedSignal(crossLinkPartial));
      }
        
    // We now have to account for the left/right cappings. However,
    // when there are multiple region selections(that is multiple
    // Coordinate elements in the calcOptions.coordinateList()) it is
    // necessary to know if the user wants each of these Coordinates
    // to be considered real oligomers(each one with its left/right
    // caps) or as residual chains. Thus there are two cases:

    // 1. Each Coordinates item should be considered an oligomer
    //(SelectionType is SELECTION_TYPE_OLIGOMERS), thus for each item
    // the left and right caps should be accounted for. This is
    // typically the case when the user selects multiple regions to
    // compute the mass of cross-linked oligomers.

    // 2. Each Coordinates item should be considered a residual chain
    //(SelectionType is SELECTION_TYPE_RESIDUAL_CHAINS), thus only
    // one item should see its left and right caps accounted for. This
    // is typically the case when the user selects multiple regions
    // like it would select repeated sequence elements in a polymer
    // sequence: all the regions selected are treated as a single
    // oligomer.

    // Holds the number of times the chemical entities are to be
    // accounted for.
    int times = 0;
    
    if (calcOptions.selectionType() == SELECTION_TYPE_RESIDUAL_CHAINS)
      {
//    qDebug() << __FILE__ << __LINE__
//            << "SELECTION_TYPE_RESIDUAL_CHAINS";
      
      times = 1;
      }
    else
      {
//    qDebug() << __FILE__ << __LINE__
//            << "SELECTION_TYPE_OLIGOMERS";
      
      times = calcOptions.coordinateList().size();
      }
    
    // Account for the left and right cap masses, if so required.
    if (calcOptions.capping() & MXT_CAP_LEFT)
      {
      ret = Polymer::accountCappingMasses(polymer,
                                   MXT_CAP_LEFT,
                                   mono, avg, times);
      Q_ASSERT(ret);
      }
    
    if (calcOptions.capping() & MXT_CAP_RIGHT)
      {
      ret = Polymer::accountCappingMasses(polymer,
                                   MXT_CAP_RIGHT,
                                   mono, avg, times);
      
      Q_ASSERT(ret);
      }
    
    // Account for the left and right modification masses, if so
    // required and the region(s) require(s) it: we have to make it
    // clear if the selection encompasses indices 0(left end) and/or
    // polymerSize-1(right end).

    // Note that if we are force to take into account either or both
    // the left/right end modif, then even if the selected region does
    // not encompass the end(s), their modif(s) must be taken into
    // account.
    
    if (calcOptions.polymerEntities() & MXT_POLYMER_CHEMENT_LEFT_END_MODIF)
      {
      if(calcOptions.polymerEntities() &
          MXT_POLYMER_CHEMENT_FORCE_LEFT_END_MODIF)
        {
         ret = Polymer::accountEndModifMasses 
           (polymer,
             MXT_POLYMER_CHEMENT_LEFT_END_MODIF,
             mono, avg);
          
          Q_ASSERT(ret);
        }
      else
        {
          if (calcOptions.coordinateList().encompassIndex(0))
            {
            ret = Polymer::accountEndModifMasses 
             (polymer,
               MXT_POLYMER_CHEMENT_LEFT_END_MODIF,
               mono, avg);
            
            Q_ASSERT(ret);
            }
        }
      }
    
    if (calcOptions.polymerEntities() & MXT_POLYMER_CHEMENT_RIGHT_END_MODIF)
      {
      if(calcOptions.polymerEntities() &
          MXT_POLYMER_CHEMENT_FORCE_RIGHT_END_MODIF)
        {
          ret = Polymer::accountEndModifMasses 
           (polymer,
             MXT_POLYMER_CHEMENT_RIGHT_END_MODIF,
             mono, avg);
          
          Q_ASSERT(ret);
        }
      else
        {
          if (calcOptions.coordinateList().
            encompassIndex(polymer->size() - 1))
            {
            ret = Polymer::accountEndModifMasses 
             (polymer,
               MXT_POLYMER_CHEMENT_RIGHT_END_MODIF,
               mono, avg);
            
            Q_ASSERT(ret);
            }
        }
      }
    
    //   qDebug() <<__FILE__ << __LINE__ 
    //          << "CalculateMasses Mono:" 
    //          << polymer->mono() 
    //          << "Avg:" 
    //          << polymer->avg();
  
    return true;
  }


  bool 
  Polymer::accountCappingMasses(int how, int times)
  {
    return accountCappingMasses(this, how, &m_mono, &m_avg, times);
  }


  bool 
  Polymer::accountCappingMasses(Polymer *polymer, int how,
                         double *mono, double *avg, int times)
  {
    Q_ASSERT(polymer);
    Q_ASSERT(mono && avg);
  
    const PolChemDef *polChemDef = polymer->polChemDef();
  
    const QList<Atom *> &refList = polChemDef->atomList();
  
    Formula formula;
  
    if (how & MXT_CAP_LEFT)
      {
      formula = polChemDef->leftCap();
      }
    else if (how & MXT_CAP_RIGHT)
      {
      formula = polChemDef->rightCap();
      }
    else if (how & MXT_CAP_NONE)
      return true;
    else
      Q_ASSERT(0);
  
    if (!formula.accountMasses(refList, mono, avg, times))
      return false;
  
    return true;
  }


  bool 
  Polymer::accountEndModifMasses(int how)
  {
    return accountEndModifMasses(this, how, &m_mono, &m_avg);
  }


  bool 
  Polymer::accountEndModifMasses(Polymer *polymer, int how,
                          Ponderable *ponderable)
  {
    Q_ASSERT(polymer);
    Q_ASSERT(ponderable);
  
    return accountEndModifMasses(polymer, how, 
                          &ponderable->rmono(),
                          &ponderable->ravg());
  }


  bool 
  Polymer::accountEndModifMasses(Polymer *polymer, int how,
                          double *mono, double *avg)
  {
    Q_ASSERT(polymer);
    Q_ASSERT(mono && avg);
  
    // Make a safe copy of the polymer's left/right end modif and use it
    // for doing the calculation INTO the 'mono' and 'avg' variables.
  
    if (how & MXT_POLYMER_CHEMENT_LEFT_END_MODIF)
      {
      Modif modif(polymer->leftEndModif());
      
      if(!modif.accountMasses(mono, avg))
        return false;
      }
    if (how & MXT_POLYMER_CHEMENT_RIGHT_END_MODIF)
      {
      Modif modif(polymer->rightEndModif());
      
      if(!modif.accountMasses(mono, avg))
        return false;
      }
  
    return true;
  }


  bool 
  Polymer::crossLink(CrossLink *crossLink)
  {
    Q_ASSERT(crossLink);
  
    // This function must be called once all the members taking part
    // into the crossLink have been set.

    if (!crossLink->validate())
      return false;
  
    // OK, from the perspective of the chemical modification of the
    // monomers involved in the crosslink, everything is fine.
  
    // Now is the moment that we actually perform the crossLink : this
    // is done simply by adding *this crossLink to the list of
    // crossLinks that belongs to the polymer.

    m_crossLinkList.append(crossLink);

    // If the crossLink dialog is open, inform it that it can refresh
    // the data.
    emit(crossLinkChangedSignal(this));

    return true;
  }


  bool 
  Polymer::uncrossLink(CrossLink *crossLink)
  {
    Q_ASSERT(crossLink);
  
    if (!crossLink->validate())
      return false;
  
    m_crossLinkList.removeAt(m_crossLinkList.indexOf(crossLink));
  
    delete crossLink;
    crossLink = 0;
  
    // If the crossLink dialog is open, inform it that it can refresh
    // the data.
    emit(crossLinkChangedSignal(this));

    return true;
  }



  //! Parses an XML element holding a stretch of monomer codes.
  /*! Parses the XML element passed as argument and get its text data
   (this is called XML rendering). The string(a stretch of monomer
    codes) is then converted into a list of dynamically allocated
    Monomer instances. The monomers already in the list are not
    removed, as this function might be called any number of times during
    parsing of a single polymer sequence file.

    \param element XML element to be parsed and rendered.
  
    \return true if parsing and conversion of the text to a monomer list
    were successful, false otherwise.

    \sa makeMonomerList().
  */
  bool
00780   Polymer::renderXmlCodesElement(const QDomElement &element)
  {
    QString sequence;
  
    // We are getting this:
    //  <codes>MEFEEDWYGEEDWYGTEEDWYGTEEDWYGTEEDWYGTEEDWYGTEEDWYGT</codes>
    // We have to make monomers and add them to the list of monomers.
  
    if (element.tagName() != "codes")
      return false;
  
    m_monomerText = element.text();

    if (makeMonomerList(mp_polChemDef, false) == -1) 
      return false;
    else
      return true;
  }


  //! Extract the polymer chemistry definition name.
  /*! Parses the \p filePath polymer sequence file and extracts the
    polymer chemistry definition name from it.
  
    \param filePath File path of the polymer sequence file.
  
    \return The polymer chemistry definition name.
  */
  QString
00809   Polymer::xmlPolymerFileGetPolChemDefName(const QString &filePath)
  {
    QDomDocument doc("polSeqData");
    QDomElement element;  
    QDomElement child;
    QDomElement indentedChild;
  
    QFile file(filePath);
  
    /*
      <polseqdata>
      <polchemdef_name>protein</polchemdef_name>
      ...
    */

    if (!file.open(QIODevice::ReadOnly))
      return false;
  
    if (!doc.setContent(&file)) 
      {
      file.close();
      return false;
      }
  
    file.close();

    element = doc.documentElement();

    if (element.tagName() != "polseqdata")
      {
      qDebug() << __FILE__ << __LINE__
              << "Polymer sequence file is erroneous\n";
      return false;
      }
  
    // <polchemdef_name>
    child = element.firstChildElement();
    if (child.tagName() != "polchemdef_name")
      return QString("");

    return child.text();
  }


  bool 
  Polymer::renderXmlPolymerModifElement(const QDomElement &element,
                               int version)
  {
    if (element.tagName() != "le_modif" && element.tagName() != "re_modif")
      return false;
  
    if (version < 3)
      return renderXmlPolymerModifElement(element);
    else if (version == 3)
      return renderXmlPolymerModifElementV3(element);
    else if (version >=4)
      return renderXmlPolymerModifElementV4(element);

    return false;
  }


  bool 
  Polymer::renderXmlPolymerModifElement(const QDomElement &element)
  {
    const QList<Modif*> &refList = mp_polChemDef->modifList();

    if (element.tagName() == "le_modif")
      {
      // Verify that the modification name is know to the 
      // polymer chemistry definition.
      if(Modif::isNameInList(element.text(), refList, 
                         &m_leftEndModif) == -1)
        return false;
      
      if(!m_leftEndModif.calculateMasses())
        return false;
      else
        return true;
      }
    else if (element.tagName() == "re_modif")
      {
      // Verify that the modification name is know to the 
      // polymer chemistry definition.
      if(Modif::isNameInList(element.text(), refList, 
                         &m_rightEndModif) == -1)
        return false;
      
      if(!m_rightEndModif.calculateMasses())
        return false;
      else
        return true;
      }

    return false; 
  }



  bool 
  Polymer::renderXmlPolymerModifElementV3(const QDomElement &element)
  {
    // This new version of the modification renderer is based on the
    // <mdf> element that is located inside <le_modif> or <re_modif>
    // elements.

    QDomElement child;
  
    if (element.tagName() == "le_modif")
      {
      // Go down to the <mdf> element.
      child = element.firstChildElement();

      if(!m_leftEndModif.renderXmlMdfElementV2(child))
        return false;
      else
        return true;
      }
    else if (element.tagName() == "re_modif")
      {
      // Go down to the <mdf> element.
      child = element.firstChildElement();
      
      if(!m_rightEndModif.renderXmlMdfElementV2(child))
        return false;
      else
        return true;
      } 

    return false;
  }


  bool 
  Polymer::renderXmlPolymerModifElementV4(const QDomElement &element)
  {
    // This new version of the modification renderer is based on the
    // <mdf> element that is located inside <le_modif> or <re_modif>
    // elements. But we now test the existence of <mdf> as it is not
    // required anymore inside of the l"/re_modif element, because it is
    // that le/re_modif element that has become compulsory after the
    // <polseq> element since version 4.

    QDomElement child;
  
    if (element.tagName() == "le_modif")
      {
      // Go down to the <mdf> element.
      child = element.firstChildElement();

      if(child.isNull())
        return true;
      
      if(!m_leftEndModif.renderXmlMdfElementV2(child))
        return false;
      else
        return true;
      }
    else if (element.tagName() == "re_modif")
      {
      // Go down to the <mdf> element.
      child = element.firstChildElement();
      
      if(child.isNull())
        return true;
      
      if(!m_rightEndModif.renderXmlMdfElementV2(child))
        return false;
      else
        return true;
      } 

    return false;
  }


  bool
  Polymer::renderXmlCrossLinksElement(const QDomElement &element,
                               int version)
  {
    // Up to version 4 there is no need for the version param.
    if (version)
      ;
  
    QDomElement child;
    QDomElement indentedChild;

    // element is <crosslinks>

    //   <crosslinks>
    //     <crosslink>
    //       <name>DisulfideBond</name>
    //       <targets>;2;6;</targets>
    //     </crosslink>
    //   </crosslinks>

    if (element.tagName() != "crosslinks")
      return false;
  
    child = element.firstChildElement();
      
    // There can be any number of <crosslink> elements.
    while(!child.isNull())
      {
      if(child.tagName() != "crosslink")
        return false;

      indentedChild = child.firstChildElement();

      if(indentedChild.tagName() != "name")
        return false;
      
      // We actually do have a <crosslink> element, so we can allocate
      // one now.

      CrossLink *aCrossLink = new CrossLink(mp_polChemDef,
                                     this,
                                     indentedChild.text(),
                                     "NOT_SET",
                                     "NOT_SET");
      
      // And now find in the polymer chemistry definition the right
      // crossLinker and clone that one into our newly allocated one.
      
      if(!mp_polChemDef->crossLinker 
         (indentedChild.text(),
           static_cast<CrossLinker *>(aCrossLink)))
        {
          delete aCrossLink;
          return false;
        }
      
      // At this point the crossLinker superclass of crossLink is
      // updated with the ref one.

      indentedChild = indentedChild.nextSiblingElement();
      
      if(indentedChild.tagName() != "targets")
        {
          delete aCrossLink;
          return false;
        }
      
      if(aCrossLink->populateMonomerList(indentedChild.text()) == -1)
        {
          delete aCrossLink;
          return false;
        }
      
      indentedChild = indentedChild.nextSiblingElement();
      
      if(!indentedChild.isNull())
        {
          if (indentedChild.tagName() != "comment")
            {
            delete aCrossLink;
            return false;
            }
        }

      // At this point the crossLink element is finished rendering,
      // all we have to do is perform the crossLink proper.

      if(!crossLink(aCrossLink))
        {
          delete aCrossLink;
          return false;
        }

      child = child.nextSiblingElement();
      }
  
    return true;
  }



  //! Parses a polymer sequence file.
  /*! The file is parsed and all the data encountered during parsing are
    set to member data.
  
    \param filePath The file path of the polymer sequence file.

    \return true if parsing succeeded, false otherwise.
  */
  bool 
01095   Polymer::renderXmlPolymerFile(QString filePath)
  {
    QString localFilePath;
  
    QDomDocument doc("polSeqData");
    QDomElement element;  
    QDomElement child;
    QDomElement indentedChild;
  
    Monomer *monomer = 0;

    /*
      <polseqdata>
      <polchemdef_name>protein</polchemdef_name>
      <name>Sample</name>
      <code>SP2003</code>
      <author>rusconi</author>
      <datetime>1967-09-224:09:23</datetime>
    */

    if (filePath.isEmpty())
      localFilePath = m_filePath;
    else
      localFilePath = filePath;
  
    QFile file(localFilePath);
  
    if (!file.open(QIODevice::ReadOnly))
      return false;
  
    if (!doc.setContent(&file)) 
      {
      file.close();
      return false;
      }
  
    file.close();

    element = doc.documentElement();

    if (element.tagName() != "polseqdata")
      {
      qDebug() << __FILE__ << __LINE__
              << "Polymer sequence file is erroneous\n";
      return false;
      }
  
    ///////////////////////////////////////////////
    // Check the version of the document.
  
    QString text;
  
    if (!element.hasAttribute("version"))
      text = "1";
    else
      text = element.attribute("version");
  
    bool ok = false;
  
    int version = text.toInt(&ok, 10);

    if (version < 1 || !ok)
      {
      qDebug() << __FILE__ << __LINE__
              << "Polymer sequence file has bad version number: "
              << version;
      
      return false;
      }
  
    // <polchemdef_name>
    child = element.firstChildElement();
    if (child.tagName() != "polchemdef_name")
      return false;
    //    mp_polChemDef->setName(child.text());

    // <name>
    child = child.nextSiblingElement();
    if (child.tagName() != "name")
      return false;
    m_name = child.text();
  
    // <code>
    child = child.nextSiblingElement();
    if (child.tagName() != "code")
      return false;
    m_code = child.text();
  
    // <author>
    child = child.nextSiblingElement();
    if (child.tagName() != "author")
      return false;
    m_author = child.text();
  
    // <datetime>
    child = child.nextSiblingElement();
    if (child.tagName() != "datetime")
      return false;
    m_dateTime = QDateTime::fromString(child.text(), "yyyy-MM-dd:mm:ss");

    // <polseq>
    child = child.nextSiblingElement();

    if (child.tagName() != "polseq")
      return false;

    /*
      <polseq>
      <codes>MEFEEDF</codes>
      <monomer>
      <code>S</code>
      <prop>
      <name>MODIF</name>
      <data>Phosphorylation</data>
      </prop>
      </monomer>
      <codes>GRKDKNFLKMGRK</codes>
      </polseq>
      <le_modif>
      <mdf>
      <name>Acetylation</name>
      <formula>-H+C2H3O</formula>
      <targets>;K;</targets>
      </mdf>
      </le_modif>
      <re_modif>
      <mdf>
      <name>Phosphorylation</name>
      <formula>-H+H2PO3</formula>
      <targets>;S;T;Y;</targets>
      </mdf>
      </re_modif>
    */

    // There can be any number of <codes> and <monomer> elements, in
    // whatever order.

    indentedChild = child.firstChildElement();

    while(!indentedChild.isNull())
      {
      if(indentedChild.tagName() == "codes")
        {
          if (!renderXmlCodesElement(indentedChild))
            {
            return false;
            }
        }
      else if (indentedChild.tagName() == "monomer")
        {
          monomer = new Monomer(mp_polChemDef, "NOT_SET");
        
          if (!monomer->renderXmlMonomerElement(indentedChild, version))
            {
            delete monomer;
            
            return false;
            }
          m_monomerList.append(monomer);
        }
      else
        return false;
      
      indentedChild = indentedChild.nextSiblingElement();
      }

    // Done with the <polseq> element. Depending on the version of the
    // file, we'll have two ways to perform the next steps.

    if (version <= 3)
      {
      // And now the potential polymer sequence left/right modifications.
      
      child = child.nextSiblingElement();
      
      while(!child.isNull())
        {
          if (!renderXmlPolymerModifElement(child, version))
            return false;
        
          child = child.nextSiblingElement();
        }
      }
    else
      {
      //  Go on to the next element(has to be <le_modif>.
      
      child = child.nextSiblingElement();

      if(child.tagName() != "le_modif")
        return false;
  
      if(!renderXmlPolymerModifElement(child, version))
        return false;

      // Go on to the next element(has to be <re_modif>.

      child = child.nextSiblingElement();

      if(child.tagName() != "re_modif")
        return false;
  
      if(!renderXmlPolymerModifElement(child, version))
        return false;

      // Go on to the next element(has to be <crosslinks>.
  
      child = child.nextSiblingElement();
  
      if(child.tagName() != "crosslinks")
        return false;
  
      if(!renderXmlCrossLinksElement(child, version))
        return false;
      }
    
    setFilePath(localFilePath);
  
    return true;
  }


  //! Creates the XML DTD for a polymer sequence file.
  /*! \return The DTD in string format.
   */
  QString *
01321   Polymer::formatXmlDtd()
  {
    QString *string = new QString 
     (
       "<?xml version=\"1.0\"?>\n"
       "<!-- DTD for polymer sequences, used by the\n"
       "'massXpert' mass spectrometry application.\n"
       "Copyright 2006,2007,2008 Filippo Rusconi - Licensed under the GNU GPL -->\n"
       "<!DOCTYPE polseqdata [\n"
       "<!ELEMENT polseqdata (polchemdef_name,name,code,author,datetime,polseq,le_modif,re_modif,crosslinks,prop*)>\n"
       "<!ATTLIST polseqdata version NMTOKEN #REQUIRED>\n"
       "<!ELEMENT polchemdef_name (#PCDATA)>\n"
       "<!ELEMENT mdf (name,formula,targets)>\n"
       "<!ELEMENT name (#PCDATA)>\n"
       "<!ELEMENT formula (#PCDATA)>\n"
       "<!ELEMENT targets (#PCDATA)>\n"
       "<!ELEMENT code (#PCDATA)>\n"
       "<!ELEMENT author (#PCDATA)>\n"
       "<!ELEMENT datetime (#PCDATA)>\n"
       "<!ELEMENT polseq (codes|monomer)*>\n"
       "<!ELEMENT le_modif (mdf?)>\n"
       "<!ELEMENT re_modif (mdf?)>\n"
       "<!ELEMENT codes (#PCDATA)>\n"
       "<!ELEMENT crosslink (name,targets)>\n"
       "<!ELEMENT crosslinks (crosslink*)>\n"
       "<!ELEMENT monomer (code, mdf*)>\n"
       "<!ELEMENT prop (name, data+)>\n"
       "<!ATTLIST data type (str | int | dbl) \"str\">\n"
       "<!ELEMENT data (#PCDATA)>\n"
       "]>\n"
       );
    
    return string;
  }


  //! Write the polymer sequence to file.
  /*!  \return true if successful, false otherwise.
   */
  bool
01361   Polymer::writeXmlFile()
  {
    QString *string = 0;
    QString indent("  ");

  
    // We are asked to send an xml description of the polymer sequence.

    QFile file(m_filePath);
  
    if (!file.open(QIODevice::WriteOnly))
      {
      qDebug() << __FILE__ << __LINE__
              << "Failed to open file" << m_filePath
              << "for writing.";
      
      return false;
      }

    QTextStream stream(&file);
    stream.setCodec("UTF-8");
  
    // The DTD
    string =   formatXmlDtd();
    stream << *string;
    delete string;

    // Open the <polseqdata> element.
    //"<!ELEMENT polseqdata(polchemdef_name,name,code,
    //author,datetime,polseq,prop*)>\n"

    stream << QString("<polseqdata version=\"%1\">\n")
      .arg(POL_SEQ_FILE_FORMAT_VERSION);
  
    Q_ASSERT(!mp_polChemDef->name().isEmpty());  
    stream << QString("%1<polchemdef_name>%2</polchemdef_name>\n")
      .arg(indent).arg(mp_polChemDef->name());
  
    stream << QString("%1<name>%2</name>\n")
      .arg(indent).arg(m_name.isEmpty() ? "Not Set" : m_name);
  
    stream << QString("%1<code>%2</code>\n")
      .arg(indent).arg(m_code.isEmpty() ? "Not Set" : m_code);
  
    Q_ASSERT(!m_author.isEmpty());
    stream << QString("%1<author>%2</author>\n")
      .arg(indent).arg(m_author);
  
    m_dateTime = QDateTime::currentDateTime();
    stream << QString("%1<datetime>%2</datetime>\n")
      .arg(indent).arg(dateTime());
  
    string = formatXmlPolSeqElement(1);

    if (string == 0)
      {
      qDebug() << __FILE__ << __LINE__
              << "Failed to produce the <polseq> element string.";
      
      return false;
      }
  
    stream << *string;
    delete string;

 
    // Now deal with the polymer modifications. These are represented as
    // <mdf> elements. Note that as of version 4, <le_modif> and
    // <re_modif> are compulsory after <polseq>.
  
    // Left end modif
    Q_ASSERT(!m_leftEndModif.name().isEmpty());
  
    stream << QString("%1<le_modif>\n")
      .arg(indent);    
  
    if (m_leftEndModif.name() != "NOT_SET")
      {
      string = m_leftEndModif.formatXmlMdfElement(2);

      stream << *string;
      delete string;
      }
  
    stream << QString("%1</le_modif>\n")
      .arg(indent);    
  

    // Right end modif
    Q_ASSERT(!m_rightEndModif.name().isEmpty());
  
    stream << QString("%1<re_modif>\n")
      .arg(indent);    
  
    if (m_rightEndModif.name() != "NOT_SET")
      {
      string = m_rightEndModif.formatXmlMdfElement(2);
  
      stream << *string;
      delete string;
      }
  
    stream << QString("%1</re_modif>\n")
      .arg(indent);    
  
  
    // Now deal with the crossLinks between monomers. These are stored
    // in the m_crossLinkList QList of CrossLink pointers. Note that
    // as of version 4, <crosslinks> is compulsory after
    // <re_modif>.
    string = 0;
    string = formatXmlCrossLinksElement(1);
  
    if (!string)
      {
      qDebug() << __FILE__ << __LINE__
              << "Failed to produce the <crosslinks> element string.";
      
      return false;
      }
  
    stream << *string;
    delete string;

    // Note that at some point, there might be any number of polymer
    // <prop> elements at this place...


    // Finally close the polseqdata.

    stream << QString("</polseqdata>\n");
  
    return true;
  }



  //! Formats a string suitable to use as an XML element.
  /*! Formats a string suitable to be used as an XML element in a
    polymer sequence file. This function generates a string holding all
    the elements pertaining to \p this polymer \em sequence(the list of
    monomers, \em not all the other data). The typical element that is
    generated in this function looks like this:

    \verbatim 
    <polseq>
    <codes>MEFEEDWYGEEDWYGTEEDWYGTEEDWYGTEEDWYGTEEDWYGTEEDWYGT</codes>
    <monomer>
    <code>S</code>
    <mdf>
    <name>Phosphorylation</name>
    <formula></formula>
    <targets>*</targets>
    </mdf>
    </monomer>
    </polseq>
    \endverbatim  
  
    \param offset times the \p indent string must be used as a lead in the
    formatting of elements.

    \param indent string used to create the leading space that is placed
    at the beginning of indented XML elements inside the XML
    element. Defaults to two spaces(QString(" ")).

    \return a dynamically allocated string that needs to be freed after
    use.

    \sa writeXmlFile().
  */
  QString *
01532   Polymer::formatXmlPolSeqElement(int offset, const QString &indent)
  {
    int newOffset;
    int iter = 0;
  
    QString lead("");
    QString codesString("");
    QString *monomerString = 0;
    QString *string = new QString();
  
    const Monomer *monomer = 0;
  

    // Prepare the lead.
    newOffset = offset;  
    while(iter < newOffset)
      {
      lead += indent;
      ++iter;
      }


    // At this point, we have to iterate in the sequence. If the
    // monomers are not modified, then put their codes in a raw, like
    // "ETGSH", in a <codes> element. As soon as a monomer is modified,
    // whatever the modification --that is, it has a prop object in its
    // --m_propList, it and its contents should be listed in a detailed
    // <monomer> element.

    *string += QString("%1<polseq>\n")
      .arg(lead);
  
    // Prepare the lead.
    ++newOffset;
    lead.clear();
    iter = 0;
    while(iter < newOffset)
      {
      lead += indent;
      ++iter;
      }
  
    // Iterate in the polymer sequence.

    for (int iter = 0 ; iter < m_monomerList.size(); ++iter)
      {
      monomer = m_monomerList.at(iter);
      Q_ASSERT(monomer);
      
      // Check if the monomer is modified. If not, we just append its
      // code to the elongating codesString, else we use a more
      // thorough monomer element-parsing function.

      if(!monomer->isModified())
        {
          codesString += monomer->code();
          continue;
        }
      else
        {
          // If something was baking in codesString, then we have to
          // create the element right now, fill the data in it and
          // close it before opening one <monomer> element below.
        
          if (!codesString.isEmpty())
            {
            *string += QString("%1<codes>%2%3")
              .arg(lead)
              .arg(codesString)
              .arg("</codes>\n");
            
            codesString.clear();
            }
        
          monomerString = monomer->formatXmlMonomerElement(newOffset);
        
          if (!monomerString)
            {
            delete string;
            return 0;
            }
          else
            {
            *string += *monomerString;
            }
        
        }
      }
  
    // If something was baking in codesString, then we have to
    // create the element right now, fill the data in it and
    // close it before opening one <monomer> element below.
        
    if (!codesString.isEmpty())
      {
      *string += QString("%1<codes>%2%3")
        .arg(lead)
        .arg(codesString)
        .arg("</codes>\n");
            
      codesString.clear();
      }
        

    // Prepare the lead for the closing element.
    --newOffset;
    lead.clear();
    iter = 0;
    while(iter < newOffset)
      {
      lead += indent;
      ++iter;
      }

    *string += QString("%1</polseq>\n")
      .arg(lead);
    
    return string;
  }
  

  QString *
  Polymer::formatXmlCrossLinksElement(int offset, const QString &indent)
  {
    int newOffset;
    int iter = 0;
  
    QString lead("");
    QString *string = new QString();

    // Prepare the lead.
    newOffset = offset;  
    while(iter < newOffset)
      {
      lead += indent;
      ++iter;
      }

    // This is the kind of string we have to generate.

    //   <crosslinks>
    //     <crosslink>
    //       <name>DisulfideBond</name>
    //       <targets>;2;6;</targets>
    //     </crosslink>
    //   </crosslinks>
  

    // At this point, we have to iterate in the list of crosslinks and
    // for each crosslink determine what's the crosslinker and which
    // monomer are actually crosslinked together.

    *string += QString("%1<crosslinks>\n")
      .arg(lead);
  
    // Prepare the lead.
    ++newOffset;
    lead.clear();
    iter = 0;
    while(iter < newOffset)
      {
      lead += indent;
      ++iter;
      }
  
    for (int jter = 0 ; jter < m_crossLinkList.size(); ++jter)
      {
      CrossLink *crossLink = m_crossLinkList.at(jter);
      Q_ASSERT(crossLink);

      *string += QString("%1<crosslink>\n")
        .arg(lead);
      
      // Prepare the lead.
      ++newOffset;
      lead.clear();
      iter = 0;
      while(iter < newOffset)
        {
          lead += indent;
          ++iter;
        }
  
      *string += QString("%1<name>%2</name>\n")
        .arg(lead)
        .arg(crossLink->name());

      // Create the string with all the monomer indices(which are the
      // targets of the crossLink).

      *string += QString("%1<targets>%2</targets>\n")
        .arg(lead)
        .arg(crossLink->monomerIndexText());
      
      *string += QString("%1<comment>%2</comment>\n")
        .arg(lead)
        .arg(crossLink->comment());
          
      // Prepare the lead.
      --newOffset;
      lead.clear();
      iter = 0;
      while(iter < newOffset)
        {
          lead += indent;
          ++iter;
        }
      
      *string += QString("%1</crosslink>\n")
        .arg(lead);
      }
  
    // Prepare the lead.
    --newOffset;
    lead.clear();
    iter = 0;
    while(iter < newOffset)
      {
      lead += indent;
      ++iter;
      }
  
    *string += QString("%1</crosslinks>\n")
      .arg(lead);
  
    return string;
  }
                       


  bool
  Polymer::validate()
  {
    return Sequence::validate(mp_polChemDef);
  }


  //! Outputs a string to the standard error console.
  /*! Outputs a string describing the polymer. Used for debugging
    purposes.
  */
  void 
01774   Polymer::debugPutStdErr()
  {
    qDebug() << __FILE__ << __LINE__;
    qDebug() << m_name.toAscii() << m_code.toAscii();
    qDebug() << mp_polChemDef->name().toAscii();
    qDebug() << m_author.toAscii() << m_filePath.toAscii();
    qDebug() << m_leftEndModif.name().toAscii() 
            << m_rightEndModif.name().toAscii();


    for (int iter = 0 ; iter < m_monomerList.size(); ++iter)
      {
      qDebug() << m_monomerList.at(iter)->code().toAscii();
      }
  }

} // namespace massXpert

Generated by  Doxygen 1.6.0   Back to index