MASA-Core
AlignerManager.hpp
Go to the documentation of this file.
00001 /*******************************************************************************
00002  *
00003  * Copyright (c) 2010-2015   Edans Sandes
00004  *
00005  * This file is part of MASA-Core.
00006  * 
00007  * MASA-Core is free software: you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation, either version 3 of the License, or
00010  * (at your option) any later version.
00011  * 
00012  * MASA-Core is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  * 
00017  * You should have received a copy of the GNU General Public License
00018  * along with MASA-Core.  If not, see <http://www.gnu.org/licenses/>.
00019  *
00020  ******************************************************************************/
00021 
00022 #ifndef ALIGNERMANAGER_HPP_
00023 #define ALIGNERMANAGER_HPP_
00024 
00025 #include "../libmasa/libmasa.hpp"
00026 
00027 //#include "buffer/Buffer.hpp"
00028 //#include "BlocksFile.hpp"
00029 #include "biology/Sequence.hpp"
00030 #include "io/CellsReader.hpp"
00031 #include "io/CellsWriter.hpp"
00032 #include "sra/SpecialRowsPartition.hpp"
00033 #include "BlocksFile.hpp"
00034 #include "BestScoreList.hpp"
00035 #include "Job.hpp"
00036 
00037 typedef void (*callback_f)(int i, int j, int len, cell_t* data);
00038 typedef void (*callback_score_f)(score_t score, int bx, int by);
00039 
00040 
00041 class AlignerManager : public IManager {
00042 public:
00043         AlignerManager(IAligner* aligner);
00044         virtual ~AlignerManager();
00045 
00046         void alignPartition(Partition partition, int startType);
00047 
00048         /**
00049          * Defines the partition that must be processed.
00050          * @param i0    start row
00051          * @param j0    start column
00052          * @param i1    end row (exclusive)
00053          * @param j1    end column (exclusive)
00054          * @param start_type The start type of the partition. See AbstractAligner::startType.
00055          */
00056         //void setPartition(Partition partition, int start_type);
00057 
00058         /**
00059          * Defines the sequences and the range of the sequences
00060          * that will be aligned. This method also defines the range of
00061          * the sequences that will be used in the alignPartitions. Note
00062          * that the [i0,i1) and [j0,j1) are not the partition itself, but
00063          * all the aligned partitions will reside inside this range. If the
00064          * parameters $i0$ and $j0$ are positive, then the sequence data
00065          * passed to the aligner is shifted, starting in positions $i0$ and $j0$.
00066          * This allows the reduction of memory consumption, trimming the
00067          * prefix of the sequences that would never be used (i.e.
00068          * the memory may be allocated solely for the given sequence ranges.).
00069          *
00070          * @param seq0  vertical sequence object.
00071          * @param seq1  horizontal sequence object.
00072          * @param i0    lowest position in seq0 that may be aligned.
00073          * @param j0    lowest position in seq1 that may be aligned.
00074          * @param i1    highest position in seq0 that may be aligned.
00075          * @param j1    highest position in seq1 that may be aligned.
00076          * @param stats the statistics log file
00077          */
00078         void setSequences(Sequence* seq0, Sequence* seq1, int i0, int j0, int i1, int j1, FILE* stats=NULL);
00079 
00080         /**
00081          * Clear all sequence strucutures.
00082          */
00083         void unsetSequences();
00084 
00085         /**
00086          * Defines that the first column must be initialized with a pre-defined
00087          * column.
00088          *
00089          * @param firstColumnGapped If true, than the cells must be initialized
00090          * considering gaps, otherwise the cells must be initialized with zeros.
00091          * See AbstractAligner::getFirstColumnInitType for the initialization
00092          * functions.
00093          */
00094         //void setFirstColumnSource(int firstColumnInitType);
00095 
00096         /**
00097          * Defines the recurrence type of the execution.
00098          * @param recurrenceType can be SMITH_WATERMAN or NEEDLEMAN_WUNSCH.
00099          */
00100         void setRecurrenceType(int recurrenceType);
00101 
00102         /**
00103          * Defines if the aligner may do the block pruning optimization. Since there
00104          * are situations that the block pruning is not permitted, so the aligner
00105          * must respect when blockPruning is false.
00106          *
00107          * @param blockPruning true if the aligner may prune blocks.
00108          */
00109         void setBlockPruning(bool blockPruning);
00110 
00111         /**
00112          * Defines the grid dimension of the grid, if
00113          * capabilities_t::dispatch_block_scores is SUPPORTED.
00114          *
00115          * @param width the width of the grid.
00116          * @param height the height of the grid.
00117          */
00118         //void setGridDimensions(const int width, const int height);
00119 
00120         /**
00121          * Defines the partition where the special rows will be stored.
00122          * @param specialRowsPartition the special row partition.
00123          */
00124         void setSpecialRowsPartition(SpecialRowsPartition* specialRowsPartition);
00125 
00126         /**
00127          * Defines the list to store the best scores.
00128          * @param bestScoreList the list to store the best scores.
00129          * @param bestScoreLocation where to check for best score.
00130          */
00131         void setBestScoreList(BestScoreList* bestScoreList, const int bestScoreLocation = AT_NOWHERE);
00132 
00133         /**
00134          * Whenever the aligner finds the goal score, the processing must stop.
00135          */
00136         void setGoalScore(int goalScore, const int goalScoreLocation = AT_NOWHERE);
00137 
00138         /**
00139          * Defines the previous special rows partition containing the rows to
00140          * be matched against the last column/rows.
00141          */
00142         void setLastColumnReader(SeekableCellsReader* lastColumnReader);
00143 
00144         /**
00145          * Defines the previous special rows partition containing the rows to
00146          * be matched against the last column/rows.
00147          */
00148         void setLastRowReader(SeekableCellsReader* lastRowReader);
00149 
00150         /**
00151          * Do not find best score
00152          */
00153         void unsetGoalScore();
00154 
00155         /**
00156          * Defines the file to store the best score of each block.
00157          * @param blocksFile the file to store the scores.
00158          */
00159         void setBlocksFile(BlocksFile* blocksFile);
00160 
00161         /**
00162          * Defines the minimum distance (in rows) between two special rows.
00163          *
00164          * @param specialRowInterval the minimum interval between special rows.
00165          */
00166         void setSpecialRowInterval(const int specialRowInterval);
00167 
00168         /**
00169          * Defines the variable penalty functions to be aligned.
00170          *
00171          * @param match         Match score
00172          * @param mismatch      Mismatch score
00173          * @param gapOpen       Gap opening penalty
00174          * @param gapExtension  Gap extension penalty
00175          */
00176         void setPenalties(const int match, const int mismatch, const int gapOpen, const int gapExtension);
00177 
00178         /**
00179          * Sets the super-partition being aligned to be returned by the
00180          * IManager::getSuperPartition() method.
00181          *
00182          * @param superPartition the super partition to be set.
00183          * @see IManager::getSuperPartition()
00184          */
00185         void setSuperPartition(Partition superPartition);
00186 
00187         /**
00188          * Resets the super-partition. A reseted super-partition means that
00189          * it will always be equal to the current aligned partition.
00190          */
00191         void unsetSuperPartition();
00192 
00193         /* ********************* *
00194          *  Callback functions   *
00195          * ********************* */
00196 
00197         /**
00198          * Defines the callback function that will be called whenever the
00199          * dispatchRow method is called for the last row.
00200          *
00201          * @param processLastRowFunction the callback function.
00202          */
00203         //void setProcessLastRowFunction(callback_f processLastRowFunction);
00204 
00205         /**
00206          * Defines the callback function that will be called whenever the
00207          * dispatchColumn method is called for the last column.
00208          *
00209          * @param processLastColumnFunction the callback function.
00210          */
00211         //void setProcessLastColumnFunction(callback_f processLastColumnFunction);
00212 
00213         /**
00214          * Defines the callback function that will be called whenever the
00215          * dispatchScore method is called.
00216          *
00217          * @param processBlockFunction the callback function.
00218          */
00219         //void setProcessScoreFunction(callback_score_f processBlockFunction);
00220 
00221         /**
00222          * Defines the callback function that will be called whenever the
00223          * dispatchScore method is called for the last cell (right-bottom most cell).
00224          *
00225          * @param processLastCellFunction the callback function.
00226          */
00227         //void setProcessLastCellFunction(callback_score_f processLastCellFunction);
00228 
00229         /**
00230          * Return the next crosspoint found by the matching procedure or by the
00231          * goal score. If it was not found, the returned crosspoint score is -INF.
00232          */
00233         const crosspoint_t getNextCrosspoint() const;
00234 
00235         /**
00236          * Return true if the next crosspoint was found.
00237          */
00238         bool isFoundCrosspoint() const;
00239 
00240         /* Implementing IManager methods */
00241 
00242         /* Get methods */
00243         int getRecurrenceType() const;
00244         int getSpecialRowInterval() const;
00245         int getSpecialColumnInterval() const;
00246         int getFirstColumnInitType();
00247         int getFirstRowInitType();
00248         Partition getSuperPartition();
00249 
00250         /* Receive Methods */
00251         void receiveFirstRow(cell_t* buffer, int len);
00252         void receiveFirstColumn(cell_t* buffer, int len);
00253 
00254         /* Dispatch Methods */
00255         void dispatchColumn(int j, const cell_t* buffer, int len);
00256         void dispatchRow(int i, const cell_t* buffer, int len);
00257         void dispatchScore(score_t score, int bx=-1, int by=-1);
00258 
00259         /* Must Methods */
00260         bool mustContinue();
00261         bool mustDispatchLastCell();
00262         bool mustDispatchLastRow();
00263         bool mustDispatchLastColumn();
00264         bool mustDispatchSpecialRows();
00265         bool mustDispatchSpecialColumns();
00266         bool mustDispatchScores();
00267         bool mustPruneBlocks();
00268         score_t getBestScoreLastColumn() const;
00269         score_t getBestScoreLastRow() const;
00270 
00271 private:
00272         bool active;
00273 
00274         /** The aligner object that executes the SW computation */
00275         IAligner* aligner;
00276 
00277         /** The partition that is being aligned */
00278         Partition partition;
00279 
00280         /**
00281          * The reader that provides the last column saved in memory/disk.
00282          */
00283         SeekableCellsReader* lastColumnReader;
00284 
00285         /**
00286          * The reader that provides the last rows saved in memory/disk.
00287          */
00288         SeekableCellsReader* lastRowReader;
00289 
00290         /**
00291          * Vector that store the temporary cells for the matching procedures.
00292          */
00293         cell_t* baseColumn;
00294 
00295         /**
00296          * Vector that store the temporary cells for the matching procedures.
00297          */
00298         cell_t* baseRow;
00299 
00300         /** Math/mismatch/gaps parameters */
00301         const score_params_t* score_params;
00302 
00303         /**
00304          * The start type of the partition. Possible values are: TYPE_MATCH,
00305          * TYPE_GAP_1 or TYPE_GAP_2. When set to the TYPE_GAP_1 or TYPE_GAP_2,
00306          * the initialization of the first row/column must be done without the
00307          * gap opening penalty.
00308          */
00309         int startType;
00310 
00311         /** Match Score */
00312         int match;
00313 
00314         /** Mismatch Score */
00315         int mismatch;
00316 
00317         /** Gap opening penalty */
00318         int gapOpen;
00319 
00320         /** Gap extension penalty */
00321         int gapExtension;
00322 
00323         /** First column blocking buffer */
00324         SeekableCellsReader* firstColumnReader;
00325 
00326         /** First row blocking buffer */
00327         SeekableCellsReader* firstRowReader;
00328 
00329         /** Last column blocking buffer */
00330         CellsWriter* lastColumnWriter;
00331 
00332         /** Last row blocking buffer */
00333         CellsWriter* lastRowWriter;
00334 
00335         /** Partition where the Special Rows are stored */
00336         SpecialRowsPartition* specialRowsPartition;
00337 
00338         /** List with the best scores */
00339         BestScoreList* bestScoreList;
00340 
00341         /** The aligner must stop whenever it finds the goal score. */
00342         int goalScore;
00343 
00344         /** Where to check the goal score */
00345         int goalScoreLocation;
00346 
00347         /** Indicates if the next crosspoint has already been found. */
00348         bool foundCrosspoint;
00349 
00350         /** The crosspoint found by the matching procedure or the goal score */
00351         crosspoint_t nextCrosspoint;
00352 
00353         /* Stores the score of each block */
00354         BlocksFile* blocksFile;
00355 
00356         /** Where to check best score */
00357         int bestScoreLocation;
00358 
00359         /** Column tracking position for the dispatching procedure */
00360         int lastColumnPos;
00361 
00362         /** Row tracking position for the dispatching procedure */
00363         int lastRowPos;
00364 
00365         /** Callback for the last rows */
00366         //callback_f processLastRowFunction;
00367 
00368         /** Callback for the last cells */
00369         //callback_score_f processLastCellFunction;
00370 
00371         /** Callback for the scores */
00372         //callback_score_f processScoreFunction;
00373 
00374         /** Callback for the last score */
00375         //callback_f processLastColumnFunction;
00376 
00377         /** true if block must be pruned */
00378         int blockPruning;
00379 
00380         /** required recurrence type (SMITH_WATERMAN or NEEDLEMAN_WUNSCH) */
00381         int recurrenceType;
00382 
00383         /** minimum distance between two special rows */
00384         int specialRowInterval;
00385 
00386         /** File where the first row is stored */
00387         FILE* firstRowFile;
00388 
00389         /** First row tracking position for the receive procedure */
00390         //int firstRowPos;
00391 
00392         /** First column tracking position for the receive procedure */
00393         //int firstColumnPos;
00394 
00395         int firstColumnInitType;
00396 
00397         /** true if first row is gapped */
00398         //bool firstRowGapped;
00399 
00400         int firstRowInitType;
00401 
00402         /** true if first columns is gapped */
00403         //bool firstColumnGapped;
00404 
00405         score_t bestScoreLastColumn;
00406         score_t bestScoreLastRow;
00407 
00408         /*
00409          * If the setSequence method defines that the sequence may only be aligned
00410          * after some position, then the aligner receive a trimmed sequence and
00411          * its partition coordinates are relative to the start of the trimmed
00412          * sequences. This reduces the amount of memory used by the aligner.
00413          * The seq0_offset and seq1_offset indicates how many nucleotides
00414          * were trimmed in each sequence.
00415          */
00416         /** defines how many nucleotides were trimmed from the sequence 0 */
00417         int seq0_offset;
00418         /** defines how many nucleotides were trimmed from the sequence 1 */
00419         int seq1_offset;
00420 
00421         /**
00422          * Partition that holds all sub-partitions. If there are no
00423          * sub-partition, this is the single partition being aligned.
00424          */
00425         Partition superPartition;
00426 
00427         /**
00428          * Stops the execution of the aligner. This makes the
00429          * mustContinue() method to return false.
00430          */
00431         void stopAligner();
00432 
00433 
00434         int findBestCell(const cell_t* buffer, int len);
00435         match_result_t findGoalCell(const cell_t* buffer, cell_t* base, int len, CellsReader* cellsReader);
00436         match_result_t findFullGap(int len, bool openGap, SeekableCellsReader* cellsReader);
00437 
00438         /**
00439          * Defines that the first column must be initialized with a customized
00440          * column. The column is loaded from a Blocking Buffer, so consider that
00441          * the buffer will block the process if the requested data is not
00442          * fully ready. So, the buffer must be read in chunks, in order to not
00443          * block the entire execution.
00444          *
00445          * @param firstColumnBuffer the blocking buffer that will contain the
00446          * first column data.
00447          */
00448         void setFirstColumnSource(SeekableCellsReader* firstColumnReader);
00449 
00450         /**
00451          * Defines that the first row must be initialized with a pre-defined
00452          * row.
00453          *
00454          * @param firstRowGapped If true, than the cells must be initialized
00455          * considering gaps, otherwise the cells must be initialized with zeros.
00456          * See AbstractAligner::getFirstRowInitType for the initialization
00457          * functions.
00458          */
00459         //void setFirstRowSource(int firstRowInitType);
00460 
00461         /**
00462          * Defines that the first row must be initialized with a customized
00463          * row. The column is loaded from a FILE.
00464          *
00465          * @param firstRow the file that contains the first column data.
00466          */
00467         void setFirstRowSource(SeekableCellsReader* firstColumnReader);
00468 
00469         /**
00470          * Defines the destination of the last column. The column is stored
00471          * in a Blocking Buffer, so consider that the buffer will block the
00472          * process if the buffer is full.
00473 
00474          * @param lastColumnBuffer the buffer that will receive the data of
00475          *      the last column.
00476          */
00477         void setLastColumnDestination(CellsWriter* lastColumnWriter);
00478 
00479 
00480 
00481         void setLastRowDestination(CellsWriter* lastRowWriter);
00482 
00483 };
00484 
00485 #endif /* ALIGNERMANAGER_HPP_ */