MASA-Core
SpecialRowReader.cpp
Go to the documentation of this file.
00001 /*******************************************************************************
00002  *
00003  * Copyright (c) 2010-2015   Edans Sandes
00004  *
00005  * This file is part of MASA-Core.
00006  * 
00007  * MASA-Core is free software: you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation, either version 3 of the License, or
00010  * (at your option) any later version.
00011  * 
00012  * MASA-Core is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  * 
00017  * You should have received a copy of the GNU General Public License
00018  * along with MASA-Core.  If not, see <http://www.gnu.org/licenses/>.
00019  *
00020  ******************************************************************************/
00021 
00022 #include "SpecialRowReader.hpp"
00023 
00024 #include <string.h>
00025 
00026 #include <algorithm>
00027 using namespace std;
00028 
00029 #include <dirent.h>
00030 #include <stdlib.h>
00031 #include "../libmasa/IAligner.hpp"
00032 
00033 #define DEBUG (0)
00034 
00035 
00036 SpecialRowReader::SpecialRowReader(string directory, const score_params_t* score_params, bool firstRowGapped) {
00037     this->firstRowGapped = firstRowGapped;
00038     this->directory = directory;
00039     this->score_params = score_params;
00040     loadSpecialRows();
00041     id = 0;
00042     file = NULL;
00043 
00044 }
00045 
00046 SpecialRowReader::~SpecialRowReader() {
00047     close();
00048 }
00049 
00050 int SpecialRowReader::getCol() const {
00051     return specialRowCoordinates[id].col;
00052 }
00053 
00054 int SpecialRowReader::getRow() const {
00055     return specialRowCoordinates[id].row;
00056 }
00057 
00058 string SpecialRowReader::getFilename(int row, int col) {
00059         char str[500];
00060         if (row == -1) {
00061                 row = getRow();
00062         }
00063         if (col == -1) {
00064                 col = getCol();
00065         }
00066         sprintf(str, "%s/%08X.%08X", directory.c_str(), row, col);
00067         return string(str);
00068 }
00069 
00070 void SpecialRowReader::open(int start) {
00071         close();
00072     this->start = start;
00073 
00074     string filename = getFilename();
00075     if (DEBUG) printf("SpecialRow: %s (%d)\n", filename.c_str(), start);
00076     this->file = fopen(filename.c_str(), "rb");
00077     printf("%s\n", filename.c_str());
00078     if (getRow() == 0 && getCol() == 0) {
00079         printf("First Special Row (0,0)\n");
00080     } else if (file == NULL) {
00081         fprintf(stderr, "Could not open special row file: %s\n", filename.c_str());
00082         exit(1);
00083     }
00084 }
00085 
00086 void SpecialRowReader::close() {
00087     if (file != NULL) {
00088         printf("File Close: %p\n", file);
00089         fclose(file);
00090         file = NULL;
00091     }
00092 }
00093 
00094 int SpecialRowReader::getCurrentPosition() {
00095     return current;
00096 }
00097 
00098 int SpecialRowReader::read(cell_t* buf, int offset, int len) {
00099         int revOffset = start-offset-(len-1);
00100         if (revOffset < 0) {
00101                 revOffset = 0;
00102                 len = start-offset+1;
00103                 if (len < 0) len = 0;
00104         }
00105         if (file == NULL) {
00106                 if (score_params == NULL) {
00107                         fprintf(stderr, "No score parameters supplied for the first row.\n");
00108                         exit(1);
00109                 }
00110                 for (int k=0; k<len; k++) {
00111                         int ir = revOffset+(len-1-k);
00112                         if (firstRowGapped) {
00113                                 buf[k].h = (ir==0) ? 0 : -ir*score_params->gap_ext-score_params->gap_open;
00114                         } else {
00115                                 buf[k].h = 0;
00116                         }
00117                         buf[k].f = -INF;
00118                 }
00119         } else {
00120                 int r=0;
00121                 fseek(file, revOffset*sizeof(cell_t), SEEK_SET);
00122                 if (DEBUG) printf("READ [%d..%d] (%d) POS_SEEK: %ld\n", revOffset, revOffset+len-1,  len, ftell(file));
00123                 while (r<len) {
00124                         int ret = fread(buf, sizeof(cell_t), len-r, file);
00125                         if (ret == 0) {
00126                                 fprintf(stderr, "Error: End of special row (%d).\n", len-r);
00127                                 exit(1);
00128                         }
00129                         r += ret;
00130                 }
00131                 for (int i=0; i<len/2; i++) {
00132                         cell_t aux = buf[i];
00133                         buf[i] = buf[len-1-i];
00134                         buf[len-1-i] = aux;
00135                 }
00136         }
00137 
00138     return len;
00139 }
00140 
00141 int coordinate_t::sortf(coordinate_t a, coordinate_t b) {
00142     if (a.row != b.row) {
00143         return a.row > b.row;
00144     } else {
00145         return a.col > b.col;
00146     }
00147 }
00148 
00149 void SpecialRowReader::loadSpecialRows() {
00150     DIR *dir = NULL;
00151 
00152     dir = opendir (directory.c_str());
00153     struct dirent *dp;          /* returned from readdir() */
00154 
00155     if (dir == NULL) {
00156         fprintf(stderr, "Could not open special rows directory: %s\n", directory.c_str());
00157         exit(1);
00158     }
00159 
00160     while ((dp = readdir (dir)) != NULL) {
00161         coordinate_t coords;
00162         int col;
00163         if (sscanf(dp->d_name, "%X.%X", &coords.row, &coords.col) == 2) {
00164             specialRowCoordinates.push_back(coords);
00165         }
00166     }
00167     coordinate_t coords = {0,0};
00168     specialRowCoordinates.push_back(coords);
00169 
00170     closedir (dir);
00171     sort(specialRowCoordinates.begin(), specialRowCoordinates.end(), coordinate_t::sortf);
00172 
00173     if (DEBUG) {
00174                 vector<coordinate_t>::iterator it;
00175                 for (it=specialRowCoordinates.begin() ; it < specialRowCoordinates.end(); it++) {
00176                         printf ("(%08X.%08X)\n", (*it).row, (*it).col);
00177                 }
00178     }
00179 }
00180 
00181 
00182 bool SpecialRowReader::nextSpecialRow(int row, int col, int min_dist) {
00183         close();
00184 
00185     int count = specialRowCoordinates.size();
00186 
00187         while (id<count && specialRowCoordinates[id].row > row - min_dist) {
00188                 if (DEBUG) printf("l: %d  (%d,%d) - (%d,%d)\n", id,
00189                                 specialRowCoordinates[id].row, specialRowCoordinates[id].col,
00190                                 row, col);
00191                 id++;
00192         }
00193 
00194     printf("id: %d\n", id);
00195     if (id >= count) {
00196         id = count-1; // coordinate {0,0}
00197         printf("End of Special Lines\n");
00198         return false;
00199     }
00200 
00201     printf("-[%d.%d]\n", row, col);
00202     row = specialRowCoordinates[id].row;
00203     col = specialRowCoordinates[id].col;
00204 
00205     printf("+[%d.%d]\n", row, col);
00206 
00207     return true;
00208 }
00209 
00210 int SpecialRowReader::getLargestInterval(int* max_i, int* max_j) {
00211 
00212         int max_size_i = 0;
00213         int max_size_j = 0;
00214         for (int i=1; i < specialRowCoordinates.size(); i++) {
00215                 coordinate_t curr = specialRowCoordinates[i];
00216                 coordinate_t prev = specialRowCoordinates[i-1];
00217                 int delta_i = prev.row - curr.row;
00218                 int delta_j = prev.col - curr.col;
00219                 if (delta_i !=0 && delta_j != 0) {
00220                         // TODO conferir. Principalmente se estiver no stage 3!! pois teriamos que contar o crosspoint.
00221                         // TODO talvez devessemos salvar um special row dummy pra evitar isso.
00222                         continue;
00223                 }
00224                 if (max_size_i < delta_i) max_size_i = delta_i;
00225                 if (max_size_j < delta_j) max_size_j = delta_j;
00226         }
00227         if (max_i != NULL) *max_i = max_size_i;
00228         if (max_j != NULL) *max_j = max_size_j;
00229         if (max_size_i > max_size_j) {
00230                 return max_size_i;
00231         } else {
00232                 return max_size_j;
00233         }
00234 }