MASA-Core
SpecialRowsPartition.cpp
Go to the documentation of this file.
00001 /*******************************************************************************
00002  *
00003  * Copyright (c) 2010-2015   Edans Sandes
00004  *
00005  * This file is part of MASA-Core.
00006  * 
00007  * MASA-Core is free software: you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation, either version 3 of the License, or
00010  * (at your option) any later version.
00011  * 
00012  * MASA-Core is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  * 
00017  * You should have received a copy of the GNU General Public License
00018  * along with MASA-Core.  If not, see <http://www.gnu.org/licenses/>.
00019  *
00020  ******************************************************************************/
00021 
00022 #include "SpecialRowsPartition.hpp"
00023 #include "SpecialRowFile.hpp"
00024 #include "SpecialRowRAM.hpp"
00025 #include "FirstRow.hpp"
00026 #include "../io/FileCellsWriter.hpp"
00027 #include "../io/FileCellsReader.hpp"
00028 #include "../io/InitialCellsReader.hpp"
00029 #include "../io/TeeCellsReader.hpp"
00030 #include "../io/FileStream.hpp"
00031 
00032 #include <stdio.h>
00033 #include <stdlib.h>
00034 #include <string.h>
00035 #include <sstream>
00036 
00037 #include <algorithm>
00038 using namespace std;
00039 
00040 #include <dirent.h>
00041 #include <sys/stat.h>
00042 #include <errno.h>
00043 
00044 #define DEBUG   (0)
00045 
00046 #define def2str(x) #x
00047 
00048 SpecialRowsPartition::SpecialRowsPartition(string _path, int _i0, int _j0, int _i1, int _j1, bool _readOnly, const score_params_t* score_params)
00049         : path(_path), i0(_i0), j0(_j0), i1(_i1), j1(_j1), readOnly(_readOnly) {
00050 
00051         this->readingRow = NULL;
00052         this->ramProportion = 1;
00053         this->diskProportion = 0;
00054         this->ramCount = 0;
00055         this->diskCount = 0;
00056         this->lastRowId = 0;
00057         this->lastRowFilename = "";
00058         this->firstColumnWriter = NULL;
00059         this->score_params = score_params;
00060 
00061     rowsVector.push_back(&firstRow);
00062         //this->path = getPartitionPath(i0, j0, i1, j1);
00063     if (readOnly) {
00064         persistent = true;
00065         struct stat s;
00066         if (stat(path.c_str(), &s) == -1) {
00067             if(errno == ENOENT) {
00068                         fprintf(stderr, "Path (%s) does not exist. (errno: %d)\n", path.c_str(), errno);
00069                         exit(1);
00070             }
00071         }
00072     } else {
00073         persistent = (path.length() != 0);
00074         if (persistent && mkdir(path.c_str(), 0774)) {
00075                 if (errno != EEXIST) {
00076                         fprintf(stderr, "Path (%s) could not be created. Try to create it manually. (errno: %d)\n", path.c_str(), errno);
00077                         exit(1);
00078                 }
00079         }
00080     }
00081 
00082     if (persistent) {
00083         readDirectory();
00084     }
00085 }
00086 
00087 SpecialRowsPartition::~SpecialRowsPartition() {
00088     if (readingRow != NULL) {
00089         readingRow->close();
00090     }
00091         //truncate(i1, j1); // TODO podemos retirar definitivamente?
00092     for (map<int, SpecialRow*>::iterator it = rowsMap.begin(); it != rowsMap.end(); it++) {
00093         SpecialRow* row = (*it).second;
00094         delete row;
00095     }
00096     rowsMap.clear();
00097         for (vector<SpecialRow*>::iterator it = rowsVector.begin(); it != rowsVector.end(); it++) {
00098                 SpecialRow* row = (*it);
00099                 if (row != &firstRow) {
00100                         delete row;
00101                 }
00102         }
00103         rowsVector.clear();
00104 }
00105 
00106 //void SpecialRowsPartition::setFirstRow(const score_params_t* score_params,    bool firstRowGapped) {
00107 //      firstRow.setParams(score_params, firstRowGapped);
00108 //}
00109 
00110 void SpecialRowsPartition::setFirstColumnReader(SeekableCellsReader* reader) {
00111         setBorderReader('C', reader, firstColumnWriter);
00112         this->firstColumnReader = reader;
00113 }
00114 
00115 void SpecialRowsPartition::setFirstRowReader(SeekableCellsReader* reader) {
00116         setBorderReader('R', reader, firstRowWriter);
00117         this->firstRowReader = reader;
00118 }
00119 
00120 
00121 void SpecialRowsPartition::setLastColumnWriter(CellsWriter* writer) {
00122         this->lastColumnWriter = writer;
00123 }
00124 
00125 void SpecialRowsPartition::setLastRowWriter(CellsWriter* writer) {
00126         this->lastRowWriter = writer;
00127 }
00128 
00129 void SpecialRowsPartition::setBorderReader(char prefix, SeekableCellsReader* &reader,
00130                 CellsWriter* &writer) {
00131         if (reader == NULL) return;
00132         if (!persistent) return;
00133 
00134         char str[20];
00135         int startOffset;
00136         if (reader->getType() == INIT_WITH_CUSTOM_DATA) {
00137                 startOffset = 0;
00138         } else {
00139                 InitialCellsReader* initial = (InitialCellsReader*)reader;
00140                 startOffset = initial->getStartOffset();
00141                 //startOffset = -reader->getOffset();
00142         }
00143         sprintf(str, "%c%08X", prefix, startOffset);
00144         stringstream filename;
00145         filename << path << "/" << str << ".";
00146 
00147         switch (reader->getType()) {
00148         case INIT_WITH_CUSTOM_DATA:
00149                 filename << def2str(INIT_WITH_CUSTOM_DATA);
00150                 break;
00151         case INIT_WITH_ZEROES:
00152                 filename << def2str(INIT_WITH_ZEROES);
00153                 break;
00154         case INIT_WITH_GAPS:
00155                 filename << def2str(INIT_WITH_GAPS);
00156                 break;
00157         case INIT_WITH_GAPS_OPENED:
00158                 filename << def2str(INIT_WITH_GAPS_OPENED);
00159                 break;
00160         }
00161 
00162         if (reader->getType() == INIT_WITH_CUSTOM_DATA) {
00163                 //writer = new FileCellsWriter(filename.str());
00164                 reader = new TeeCellsReader(reader, filename.str());
00165                 //TODO
00166         } else {
00167 
00168                 FILE* file = fopen(filename.str().c_str(), "wb");
00169                 if (file == NULL) {
00170                         fprintf(stderr, "Could not create file (%s).\n", filename.str().c_str());
00171                         exit(1);
00172                 }
00173                 fclose(file);
00174         }
00175 }
00176 
00177 void SpecialRowsPartition::reload() {
00178     sort(rowsVector.begin(), rowsVector.end(), SpecialRow::sortById);
00179         readingRowId = rowsVector.size() - 1;
00180         readingRow = NULL;
00181     lastRowId = rowsVector.back()->getId();
00182 
00183         updateLargestInterval();
00184 }
00185 
00186 int SpecialRowsPartition::getLastRowId() {
00187         return i0 + lastRowId;
00188 }
00189 
00190 
00191 SpecialRow* SpecialRowsPartition::getLastRow() {
00192         return rowsVector.back();
00193 }
00194 
00195 string SpecialRowsPartition::getLastRowFilename() {
00196         return lastRowFilename;
00197 }
00198 
00199 int SpecialRowsPartition::getReadingRow() {
00200         return i0 + readingRow->getId();
00201 }
00202 
00203 void SpecialRowsPartition::truncate(int max_i, int max_j) {
00204     if (DEBUG) printf("Flush: %08X,%08X\n", max_i, max_j);
00205         for (vector<SpecialRow*>::iterator it = rowsVector.begin(); it != rowsVector.end(); ) {
00206                 SpecialRow* row = (*it);
00207         if ((row->getId() + i0) >= max_i && row != &firstRow) {
00208                 it = rowsVector.erase(it);
00209                 row->close();
00210                 row->truncateRow(0);
00211                 // precaution: the first row cannot be deleted.
00212                 delete row;
00213         } else {
00214                 row->truncateRow(max_j-j0+1);
00215                 it++;
00216         }
00217         }
00218 
00219     for (map<int, SpecialRow*>::iterator it = rowsMap.begin(); it != rowsMap.end(); it++) {
00220         SpecialRow* row = (*it).second;
00221         row->close();
00222         if ((row->getId() + i0) >= max_i) {
00223                 row->truncateRow(0);
00224                 delete row;
00225         } else {
00226                 row->truncateRow(max_j-j0+1);
00227                 rowsVector.push_back(row);
00228         }
00229     }
00230     rowsMap.clear();
00231     i1 = max_i;
00232     j1 = max_j;
00233     lastRowId = rowsVector.back()->getId();
00234 
00235     updateLargestInterval();
00236 }
00237 
00238 /*void SpecialRowsPartition::translate(int i0, int j0, int i1, int j1) {
00239         rename(getPartitionPath(this->i0, this->j0, this->i1, this->j1).c_str(),
00240                         getPartitionPath(i0, j0, i1, j1).c_str());
00241         this->i0 = i0;
00242         this->i1 = i1;
00243         this->j0 = j0;
00244         this->j1 = j1;
00245 }*/
00246 
00247 int SpecialRowsPartition::getLargestInterval() {
00248         return largestInterval;
00249 }
00250 
00251 int SpecialRowsPartition::getI0() const {
00252         return i0;
00253 }
00254 
00255 int SpecialRowsPartition::getI1() const {
00256         return i1;
00257 }
00258 
00259 int SpecialRowsPartition::getJ0() const {
00260         return j0;
00261 }
00262 
00263 int SpecialRowsPartition::getJ1() const {
00264         return j1;
00265 }
00266 
00267 const string& SpecialRowsPartition::getPath() const {
00268         return path;
00269 }
00270 
00271 void SpecialRowsPartition::changePath(string new_path) {
00272         rename(path.c_str(), new_path.c_str());
00273         path = new_path;
00274 }
00275 
00276 void SpecialRowsPartition::setRamProportion(const long long ram, const long long disk) {
00277         this->ramProportion = ram;
00278         this->diskProportion = disk;
00279 }
00280 
00281 int SpecialRowsPartition::getRowsCount() const {
00282         return rowsVector.size();
00283 }
00284 
00285 string SpecialRowsPartition::getFirstColumnFilename() {
00286         return path + "/" + "C00000000." + def2str(INIT_WITH_CUSTOM_DATA);
00287 }
00288 
00289 string SpecialRowsPartition::getFirstRowFilename() {
00290         return path + "/" + "R00000000." + def2str(INIT_WITH_CUSTOM_DATA);
00291 }
00292 
00293 CellsWriter* SpecialRowsPartition::getFirstColumnWriter() {
00294         if (firstColumnWriter == NULL) {
00295                 firstColumnWriter = new FileCellsWriter(getFirstColumnFilename());
00296         }
00297         return firstColumnWriter;
00298 }
00299 
00300 SeekableCellsReader* SpecialRowsPartition::getFirstColumnReader() {
00301         return firstColumnReader;
00302 }
00303 
00304 SeekableCellsReader* SpecialRowsPartition::getFirstRowReader() {
00305         return firstRowReader;
00306 }
00307 
00308 CellsWriter* SpecialRowsPartition::getLastColumnWriter() {
00309         return lastColumnWriter;
00310 }
00311 
00312 CellsWriter* SpecialRowsPartition::getLastRowWriter() {
00313         return lastRowWriter;
00314 }
00315 
00316 SpecialRow* SpecialRowsPartition::getSpecialRow(int i) {
00317         SpecialRow* row = NULL;
00318         row = rowsMap[i];
00319         if (row == NULL && persistent) {
00320                 // Alternate the creation of rows in disk and in ram.
00321                 if ((diskProportion !=0 && ramProportion==0) || ramCount*diskProportion > ramProportion*diskCount) {
00322                         row = new SpecialRowFile(&path, i);
00323                         diskCount++;
00324                 } else {
00325                         row = new SpecialRowRAM(i);
00326                         ramCount++;
00327                 }
00328 
00329         row->open(readOnly, j1-j0+1);
00330                 rowsMap[i] = row;
00331         }
00332         return row;
00333 }
00334 
00335 int SpecialRowsPartition::write(int i, const cell_t* buf, int len) {
00336         if (readOnly) {
00337         fprintf(stderr, "Fatal: Writing into a read-only SRA Partition");
00338         exit(1);
00339         }
00340         if (!persistent) {
00341                 return 0;
00342         }
00343         SpecialRow* row = getSpecialRow(i-i0);
00344         int ret = row->write(buf, len);
00345         if (row->getOffset() >= (j1-j0)+1) {
00346                 row->close();
00347                 rowsMap.erase(i-i0);
00348                 rowsVector.push_back(row);
00349                 lastRowId = row->getId();
00350         }
00351 
00352         return ret;
00353 }
00354 
00355 void SpecialRowsPartition::updateLargestInterval() {
00356         largestInterval = 0;
00357         for (int i = 1; i < rowsVector.size(); i++) {
00358                 int diff = rowsVector[i]->getId() - rowsVector[i-1]->getId();
00359                 if (largestInterval < diff) {
00360                         largestInterval = diff;
00361                 }
00362         }
00363 }
00364 
00365 void SpecialRowsPartition::readDirectory() {
00366         // This method insert new rows to the vectors.
00367     DIR *dir = NULL;
00368     //printf("Opening Dir: %s\n", path.c_str());
00369     dir = opendir (path.c_str());
00370     struct dirent *dp;          /* returned from readdir() */
00371 
00372     if (dir == NULL) {
00373         fprintf(stderr, "Could not open special rows directory: %s\n", path.c_str());
00374         exit(1);
00375     }
00376 
00377     while ((dp = readdir (dir)) != NULL) {
00378         int id;
00379         if (loadBorderReader('C', string(dp->d_name), firstColumnReader)) {
00380                 continue;
00381         }
00382         if (loadBorderReader('R', string(dp->d_name), firstRowReader)) {
00383                 firstRow.setCellsReader(firstRowReader);
00384                 continue;
00385         }
00386                 SpecialRow* row = new SpecialRowFile(&path, string(dp->d_name));
00387                 if (row->getId() < 0) {
00388                         delete row;
00389                 } else {
00390                         string rowFilename = path + "/" + string(dp->d_name);
00391                         //fprintf(stderr, "Loading... %s: %d\n", rowFilename.c_str(), row->getId());
00392                         rowsVector.push_back(row);
00393                         if (row->getId() > lastRowId) {
00394                                 lastRowId = row->getId();
00395                                 lastRowFilename = rowFilename;
00396                         }
00397                 }
00398 
00399     }
00400     closedir (dir);
00401 
00402     reload();
00403 }
00404 
00405 SpecialRow* SpecialRowsPartition::nextSpecialRow(int i, int j, int min_dist) {
00406         //close();
00407     int count = rowsVector.size();
00408     if (count == 0) {
00409         return NULL;
00410     }
00411 
00412     while (readingRowId >= 0) {
00413         int dist = ((i-i0)-rowsVector[readingRowId]->getId());
00414                 if (DEBUG) printf("l: %d  %d %d (%d, %d)\n", readingRowId, rowsVector[readingRowId]->getId(), dist, i,j);
00415         if (readingRowId == 0) {
00416                 if (dist > 0) {
00417                         // Ensure that the row with id=0 is processed.
00418                         break;
00419                 } else {
00420                 printf("End of Special Lines\n");
00421                 return NULL;
00422                 }
00423         } else {
00424                 if (dist > min_dist) {
00425                         break;
00426                 } else {
00427                         readingRowId--;
00428                 }
00429         }
00430         };
00431     if (readingRow != NULL) {
00432         readingRow->close();
00433     }
00434 
00435         readingRow = rowsVector[readingRowId];
00436         if (DEBUG) printf("id: %d %s/%08X\n", readingRowId, path.c_str(), readingRow->getId());
00437 
00438         if (readingRow == &firstRow) {
00439                 firstRow.setCellsReader(firstRowReader);
00440         }
00441 
00442         readingRow->open(true);
00443         int readingRowOffset = abs(j-j0)+1;
00444         readingRow->seek(readingRowOffset);
00445 
00446     return readingRow;
00447 }
00448 
00449 int SpecialRowsPartition::read(cell_t* buf, int len) {
00450         int ret = readingRow->read(buf, len);
00451         return ret;
00452 }
00453 
00454 int SpecialRowsPartition::continueFromLastRow() {
00455         firstColumnReader->read(NULL, lastRowId);
00456         // TODO o first row source precisa considerar a linha onde ele se encontra (i.e. primeira celula possui valor != 0)
00457         // TODO Lembrando que eu retirei o primeiro dispatch cell.
00458         setFirstRowReader(new FileCellsReader(lastRowFilename.c_str()));
00459         printf("Continuing partition from row %d (%s)\n",
00460                         lastRowId + i0, lastRowFilename.c_str());
00461         return lastRowId + i0;
00462 }
00463 
00464 bool SpecialRowsPartition::isPersistent() const {
00465         return persistent;
00466 }
00467 
00468 void SpecialRowsPartition::createChain(SpecialRowsPartition* that) {
00469         if (that->getI1() == this->getI0()) {
00470                 FileStream* stream = new FileStream(this->getFirstRowFilename());
00471                 that->lastRowWriter = stream;
00472                 this->firstRowReader = stream;
00473         } else if (that->getJ1() == this->getJ0()) {
00474                 FileStream* stream = new FileStream(this->getFirstColumnFilename());
00475                 that->lastColumnWriter = stream;
00476                 this->firstColumnReader = stream;
00477         } else if (this->getI1() == that->getI0()) {
00478                 FileStream* stream = new FileStream(that->getFirstRowFilename());
00479                 this->lastRowWriter = stream;
00480                 that->firstRowReader = stream;
00481         } else if (this->getJ1() == that->getJ0()) {
00482                 FileStream* stream = new FileStream(that->getFirstColumnFilename());
00483                 this->lastColumnWriter = stream;
00484                 that->firstColumnReader = stream;
00485         }
00486 
00487 }
00488 
00489 void SpecialRowsPartition::deleteRows() {
00490         truncate(-1, -1);
00491 }
00492 
00493 bool SpecialRowsPartition::loadBorderReader(char prefix, string file, SeekableCellsReader* &reader) {
00494     if (file[0] == prefix) {
00495         string type = file.substr(10);
00496         int offset;
00497         sscanf(file.c_str(), "%*c%08X", &offset);
00498         if (type == def2str(INIT_WITH_CUSTOM_DATA)) {
00499                 reader = new FileCellsReader(path + "/" + file);
00500         } else if (type == def2str(INIT_WITH_ZEROES)) {
00501                 reader = new InitialCellsReader(offset);
00502         } else if (type == def2str(INIT_WITH_GAPS)) {
00503                 // TODO is this correct? FIXME gapOpen/gapFirst ?
00504                 reader = new InitialCellsReader(score_params->gap_open + offset*score_params->gap_ext, score_params->gap_ext);
00505         } else if (type == def2str(INIT_WITH_GAPS_OPENED)) {
00506                 // TODO is this correct? FIXME gapOpen/gapFirst ?
00507                 reader = new InitialCellsReader(offset*score_params->gap_ext, score_params->gap_ext);
00508         } else {
00509                 return false;
00510         }
00511         return true;
00512     }
00513     return false;
00514 }
00515