|
MASA-Core
|
00001 /******************************************************************************* 00002 * 00003 * Copyright (c) 2010-2015 Edans Sandes 00004 * 00005 * This file is part of MASA-Core. 00006 * 00007 * MASA-Core is free software: you can redistribute it and/or modify 00008 * it under the terms of the GNU General Public License as published by 00009 * the Free Software Foundation, either version 3 of the License, or 00010 * (at your option) any later version. 00011 * 00012 * MASA-Core is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU General Public License 00018 * along with MASA-Core. If not, see <http://www.gnu.org/licenses/>. 00019 * 00020 ******************************************************************************/ 00021 00022 #ifndef ALIGNERMANAGER_HPP_ 00023 #define ALIGNERMANAGER_HPP_ 00024 00025 #include "../libmasa/libmasa.hpp" 00026 00027 //#include "buffer/Buffer.hpp" 00028 //#include "BlocksFile.hpp" 00029 #include "biology/Sequence.hpp" 00030 #include "io/CellsReader.hpp" 00031 #include "io/CellsWriter.hpp" 00032 #include "sra/SpecialRowsPartition.hpp" 00033 #include "BlocksFile.hpp" 00034 #include "BestScoreList.hpp" 00035 #include "Job.hpp" 00036 00037 typedef void (*callback_f)(int i, int j, int len, cell_t* data); 00038 typedef void (*callback_score_f)(score_t score, int bx, int by); 00039 00040 00041 class AlignerManager : public IManager { 00042 public: 00043 AlignerManager(IAligner* aligner); 00044 virtual ~AlignerManager(); 00045 00046 void alignPartition(Partition partition, int startType); 00047 00048 /** 00049 * Defines the partition that must be processed. 00050 * @param i0 start row 00051 * @param j0 start column 00052 * @param i1 end row (exclusive) 00053 * @param j1 end column (exclusive) 00054 * @param start_type The start type of the partition. See AbstractAligner::startType. 00055 */ 00056 //void setPartition(Partition partition, int start_type); 00057 00058 /** 00059 * Defines the sequences and the range of the sequences 00060 * that will be aligned. This method also defines the range of 00061 * the sequences that will be used in the alignPartitions. Note 00062 * that the [i0,i1) and [j0,j1) are not the partition itself, but 00063 * all the aligned partitions will reside inside this range. If the 00064 * parameters $i0$ and $j0$ are positive, then the sequence data 00065 * passed to the aligner is shifted, starting in positions $i0$ and $j0$. 00066 * This allows the reduction of memory consumption, trimming the 00067 * prefix of the sequences that would never be used (i.e. 00068 * the memory may be allocated solely for the given sequence ranges.). 00069 * 00070 * @param seq0 vertical sequence object. 00071 * @param seq1 horizontal sequence object. 00072 * @param i0 lowest position in seq0 that may be aligned. 00073 * @param j0 lowest position in seq1 that may be aligned. 00074 * @param i1 highest position in seq0 that may be aligned. 00075 * @param j1 highest position in seq1 that may be aligned. 00076 * @param stats the statistics log file 00077 */ 00078 void setSequences(Sequence* seq0, Sequence* seq1, int i0, int j0, int i1, int j1, FILE* stats=NULL); 00079 00080 /** 00081 * Clear all sequence strucutures. 00082 */ 00083 void unsetSequences(); 00084 00085 /** 00086 * Defines that the first column must be initialized with a pre-defined 00087 * column. 00088 * 00089 * @param firstColumnGapped If true, than the cells must be initialized 00090 * considering gaps, otherwise the cells must be initialized with zeros. 00091 * See AbstractAligner::getFirstColumnInitType for the initialization 00092 * functions. 00093 */ 00094 //void setFirstColumnSource(int firstColumnInitType); 00095 00096 /** 00097 * Defines the recurrence type of the execution. 00098 * @param recurrenceType can be SMITH_WATERMAN or NEEDLEMAN_WUNSCH. 00099 */ 00100 void setRecurrenceType(int recurrenceType); 00101 00102 /** 00103 * Defines if the aligner may do the block pruning optimization. Since there 00104 * are situations that the block pruning is not permitted, so the aligner 00105 * must respect when blockPruning is false. 00106 * 00107 * @param blockPruning true if the aligner may prune blocks. 00108 */ 00109 void setBlockPruning(bool blockPruning); 00110 00111 /** 00112 * Defines the grid dimension of the grid, if 00113 * capabilities_t::dispatch_block_scores is SUPPORTED. 00114 * 00115 * @param width the width of the grid. 00116 * @param height the height of the grid. 00117 */ 00118 //void setGridDimensions(const int width, const int height); 00119 00120 /** 00121 * Defines the partition where the special rows will be stored. 00122 * @param specialRowsPartition the special row partition. 00123 */ 00124 void setSpecialRowsPartition(SpecialRowsPartition* specialRowsPartition); 00125 00126 /** 00127 * Defines the list to store the best scores. 00128 * @param bestScoreList the list to store the best scores. 00129 * @param bestScoreLocation where to check for best score. 00130 */ 00131 void setBestScoreList(BestScoreList* bestScoreList, const int bestScoreLocation = AT_NOWHERE); 00132 00133 /** 00134 * Whenever the aligner finds the goal score, the processing must stop. 00135 */ 00136 void setGoalScore(int goalScore, const int goalScoreLocation = AT_NOWHERE); 00137 00138 /** 00139 * Defines the previous special rows partition containing the rows to 00140 * be matched against the last column/rows. 00141 */ 00142 void setLastColumnReader(SeekableCellsReader* lastColumnReader); 00143 00144 /** 00145 * Defines the previous special rows partition containing the rows to 00146 * be matched against the last column/rows. 00147 */ 00148 void setLastRowReader(SeekableCellsReader* lastRowReader); 00149 00150 /** 00151 * Do not find best score 00152 */ 00153 void unsetGoalScore(); 00154 00155 /** 00156 * Defines the file to store the best score of each block. 00157 * @param blocksFile the file to store the scores. 00158 */ 00159 void setBlocksFile(BlocksFile* blocksFile); 00160 00161 /** 00162 * Defines the minimum distance (in rows) between two special rows. 00163 * 00164 * @param specialRowInterval the minimum interval between special rows. 00165 */ 00166 void setSpecialRowInterval(const int specialRowInterval); 00167 00168 /** 00169 * Defines the variable penalty functions to be aligned. 00170 * 00171 * @param match Match score 00172 * @param mismatch Mismatch score 00173 * @param gapOpen Gap opening penalty 00174 * @param gapExtension Gap extension penalty 00175 */ 00176 void setPenalties(const int match, const int mismatch, const int gapOpen, const int gapExtension); 00177 00178 /** 00179 * Sets the super-partition being aligned to be returned by the 00180 * IManager::getSuperPartition() method. 00181 * 00182 * @param superPartition the super partition to be set. 00183 * @see IManager::getSuperPartition() 00184 */ 00185 void setSuperPartition(Partition superPartition); 00186 00187 /** 00188 * Resets the super-partition. A reseted super-partition means that 00189 * it will always be equal to the current aligned partition. 00190 */ 00191 void unsetSuperPartition(); 00192 00193 /* ********************* * 00194 * Callback functions * 00195 * ********************* */ 00196 00197 /** 00198 * Defines the callback function that will be called whenever the 00199 * dispatchRow method is called for the last row. 00200 * 00201 * @param processLastRowFunction the callback function. 00202 */ 00203 //void setProcessLastRowFunction(callback_f processLastRowFunction); 00204 00205 /** 00206 * Defines the callback function that will be called whenever the 00207 * dispatchColumn method is called for the last column. 00208 * 00209 * @param processLastColumnFunction the callback function. 00210 */ 00211 //void setProcessLastColumnFunction(callback_f processLastColumnFunction); 00212 00213 /** 00214 * Defines the callback function that will be called whenever the 00215 * dispatchScore method is called. 00216 * 00217 * @param processBlockFunction the callback function. 00218 */ 00219 //void setProcessScoreFunction(callback_score_f processBlockFunction); 00220 00221 /** 00222 * Defines the callback function that will be called whenever the 00223 * dispatchScore method is called for the last cell (right-bottom most cell). 00224 * 00225 * @param processLastCellFunction the callback function. 00226 */ 00227 //void setProcessLastCellFunction(callback_score_f processLastCellFunction); 00228 00229 /** 00230 * Return the next crosspoint found by the matching procedure or by the 00231 * goal score. If it was not found, the returned crosspoint score is -INF. 00232 */ 00233 const crosspoint_t getNextCrosspoint() const; 00234 00235 /** 00236 * Return true if the next crosspoint was found. 00237 */ 00238 bool isFoundCrosspoint() const; 00239 00240 /* Implementing IManager methods */ 00241 00242 /* Get methods */ 00243 int getRecurrenceType() const; 00244 int getSpecialRowInterval() const; 00245 int getSpecialColumnInterval() const; 00246 int getFirstColumnInitType(); 00247 int getFirstRowInitType(); 00248 Partition getSuperPartition(); 00249 00250 /* Receive Methods */ 00251 void receiveFirstRow(cell_t* buffer, int len); 00252 void receiveFirstColumn(cell_t* buffer, int len); 00253 00254 /* Dispatch Methods */ 00255 void dispatchColumn(int j, const cell_t* buffer, int len); 00256 void dispatchRow(int i, const cell_t* buffer, int len); 00257 void dispatchScore(score_t score, int bx=-1, int by=-1); 00258 00259 /* Must Methods */ 00260 bool mustContinue(); 00261 bool mustDispatchLastCell(); 00262 bool mustDispatchLastRow(); 00263 bool mustDispatchLastColumn(); 00264 bool mustDispatchSpecialRows(); 00265 bool mustDispatchSpecialColumns(); 00266 bool mustDispatchScores(); 00267 bool mustPruneBlocks(); 00268 score_t getBestScoreLastColumn() const; 00269 score_t getBestScoreLastRow() const; 00270 00271 private: 00272 bool active; 00273 00274 /** The aligner object that executes the SW computation */ 00275 IAligner* aligner; 00276 00277 /** The partition that is being aligned */ 00278 Partition partition; 00279 00280 /** 00281 * The reader that provides the last column saved in memory/disk. 00282 */ 00283 SeekableCellsReader* lastColumnReader; 00284 00285 /** 00286 * The reader that provides the last rows saved in memory/disk. 00287 */ 00288 SeekableCellsReader* lastRowReader; 00289 00290 /** 00291 * Vector that store the temporary cells for the matching procedures. 00292 */ 00293 cell_t* baseColumn; 00294 00295 /** 00296 * Vector that store the temporary cells for the matching procedures. 00297 */ 00298 cell_t* baseRow; 00299 00300 /** Math/mismatch/gaps parameters */ 00301 const score_params_t* score_params; 00302 00303 /** 00304 * The start type of the partition. Possible values are: TYPE_MATCH, 00305 * TYPE_GAP_1 or TYPE_GAP_2. When set to the TYPE_GAP_1 or TYPE_GAP_2, 00306 * the initialization of the first row/column must be done without the 00307 * gap opening penalty. 00308 */ 00309 int startType; 00310 00311 /** Match Score */ 00312 int match; 00313 00314 /** Mismatch Score */ 00315 int mismatch; 00316 00317 /** Gap opening penalty */ 00318 int gapOpen; 00319 00320 /** Gap extension penalty */ 00321 int gapExtension; 00322 00323 /** First column blocking buffer */ 00324 SeekableCellsReader* firstColumnReader; 00325 00326 /** First row blocking buffer */ 00327 SeekableCellsReader* firstRowReader; 00328 00329 /** Last column blocking buffer */ 00330 CellsWriter* lastColumnWriter; 00331 00332 /** Last row blocking buffer */ 00333 CellsWriter* lastRowWriter; 00334 00335 /** Partition where the Special Rows are stored */ 00336 SpecialRowsPartition* specialRowsPartition; 00337 00338 /** List with the best scores */ 00339 BestScoreList* bestScoreList; 00340 00341 /** The aligner must stop whenever it finds the goal score. */ 00342 int goalScore; 00343 00344 /** Where to check the goal score */ 00345 int goalScoreLocation; 00346 00347 /** Indicates if the next crosspoint has already been found. */ 00348 bool foundCrosspoint; 00349 00350 /** The crosspoint found by the matching procedure or the goal score */ 00351 crosspoint_t nextCrosspoint; 00352 00353 /* Stores the score of each block */ 00354 BlocksFile* blocksFile; 00355 00356 /** Where to check best score */ 00357 int bestScoreLocation; 00358 00359 /** Column tracking position for the dispatching procedure */ 00360 int lastColumnPos; 00361 00362 /** Row tracking position for the dispatching procedure */ 00363 int lastRowPos; 00364 00365 /** Callback for the last rows */ 00366 //callback_f processLastRowFunction; 00367 00368 /** Callback for the last cells */ 00369 //callback_score_f processLastCellFunction; 00370 00371 /** Callback for the scores */ 00372 //callback_score_f processScoreFunction; 00373 00374 /** Callback for the last score */ 00375 //callback_f processLastColumnFunction; 00376 00377 /** true if block must be pruned */ 00378 int blockPruning; 00379 00380 /** required recurrence type (SMITH_WATERMAN or NEEDLEMAN_WUNSCH) */ 00381 int recurrenceType; 00382 00383 /** minimum distance between two special rows */ 00384 int specialRowInterval; 00385 00386 /** File where the first row is stored */ 00387 FILE* firstRowFile; 00388 00389 /** First row tracking position for the receive procedure */ 00390 //int firstRowPos; 00391 00392 /** First column tracking position for the receive procedure */ 00393 //int firstColumnPos; 00394 00395 int firstColumnInitType; 00396 00397 /** true if first row is gapped */ 00398 //bool firstRowGapped; 00399 00400 int firstRowInitType; 00401 00402 /** true if first columns is gapped */ 00403 //bool firstColumnGapped; 00404 00405 score_t bestScoreLastColumn; 00406 score_t bestScoreLastRow; 00407 00408 /* 00409 * If the setSequence method defines that the sequence may only be aligned 00410 * after some position, then the aligner receive a trimmed sequence and 00411 * its partition coordinates are relative to the start of the trimmed 00412 * sequences. This reduces the amount of memory used by the aligner. 00413 * The seq0_offset and seq1_offset indicates how many nucleotides 00414 * were trimmed in each sequence. 00415 */ 00416 /** defines how many nucleotides were trimmed from the sequence 0 */ 00417 int seq0_offset; 00418 /** defines how many nucleotides were trimmed from the sequence 1 */ 00419 int seq1_offset; 00420 00421 /** 00422 * Partition that holds all sub-partitions. If there are no 00423 * sub-partition, this is the single partition being aligned. 00424 */ 00425 Partition superPartition; 00426 00427 /** 00428 * Stops the execution of the aligner. This makes the 00429 * mustContinue() method to return false. 00430 */ 00431 void stopAligner(); 00432 00433 00434 int findBestCell(const cell_t* buffer, int len); 00435 match_result_t findGoalCell(const cell_t* buffer, cell_t* base, int len, CellsReader* cellsReader); 00436 match_result_t findFullGap(int len, bool openGap, SeekableCellsReader* cellsReader); 00437 00438 /** 00439 * Defines that the first column must be initialized with a customized 00440 * column. The column is loaded from a Blocking Buffer, so consider that 00441 * the buffer will block the process if the requested data is not 00442 * fully ready. So, the buffer must be read in chunks, in order to not 00443 * block the entire execution. 00444 * 00445 * @param firstColumnBuffer the blocking buffer that will contain the 00446 * first column data. 00447 */ 00448 void setFirstColumnSource(SeekableCellsReader* firstColumnReader); 00449 00450 /** 00451 * Defines that the first row must be initialized with a pre-defined 00452 * row. 00453 * 00454 * @param firstRowGapped If true, than the cells must be initialized 00455 * considering gaps, otherwise the cells must be initialized with zeros. 00456 * See AbstractAligner::getFirstRowInitType for the initialization 00457 * functions. 00458 */ 00459 //void setFirstRowSource(int firstRowInitType); 00460 00461 /** 00462 * Defines that the first row must be initialized with a customized 00463 * row. The column is loaded from a FILE. 00464 * 00465 * @param firstRow the file that contains the first column data. 00466 */ 00467 void setFirstRowSource(SeekableCellsReader* firstColumnReader); 00468 00469 /** 00470 * Defines the destination of the last column. The column is stored 00471 * in a Blocking Buffer, so consider that the buffer will block the 00472 * process if the buffer is full. 00473 00474 * @param lastColumnBuffer the buffer that will receive the data of 00475 * the last column. 00476 */ 00477 void setLastColumnDestination(CellsWriter* lastColumnWriter); 00478 00479 00480 00481 void setLastRowDestination(CellsWriter* lastRowWriter); 00482 00483 }; 00484 00485 #endif /* ALIGNERMANAGER_HPP_ */
1.7.6.1