|
MASA-Core
|
00001 /******************************************************************************* 00002 * 00003 * Copyright (c) 2010-2015 Edans Sandes 00004 * 00005 * This file is part of MASA-Core. 00006 * 00007 * MASA-Core is free software: you can redistribute it and/or modify 00008 * it under the terms of the GNU General Public License as published by 00009 * the Free Software Foundation, either version 3 of the License, or 00010 * (at your option) any later version. 00011 * 00012 * MASA-Core is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU General Public License 00018 * along with MASA-Core. If not, see <http://www.gnu.org/licenses/>. 00019 * 00020 ******************************************************************************/ 00021 00022 #include "libmasa.hpp" 00023 00024 #include <stdio.h> 00025 #include <stdlib.h> 00026 #include <getopt.h> 00027 #include <string.h> 00028 #include <errno.h> 00029 #include <sys/wait.h> 00030 #include <unistd.h> 00031 00032 #include "../common/Common.hpp" 00033 #include "../stage1/sw_stage1.h" 00034 #include "../stage2/sw_stage2.h" 00035 #include "../stage3/sw_stage3.h" 00036 #include "../stage4/sw_stage4.h" 00037 #include "../stage5/sw_stage5.h" 00038 #include "../stage6/sw_stage6.h" 00039 //#include "testing/AlignerTester.hpp" 00040 #include "../masanet/MasaNet.hpp" 00041 //#include "../masanet/MasaNetCLI.hpp" 00042 #include "../config.h" 00043 00044 #include <sstream> 00045 using namespace std; 00046 00047 00048 /** 00049 * Default amount of disk/ram space used for flushing special lines. 00050 */ 00051 //#define DEFAULT_RAM_LIMIT (1*1024*1024*1024LL) // 1G 00052 #define DEFAULT_RAM_LIMIT (0) // none 00053 #define DEFAULT_FLUSH_RAM_STRING "0" // SHOW USAGE 00054 #define DEFAULT_DISK_LIMIT (0) // none 00055 #define DEFAULT_FLUSH_DISK_STRING "0" // SHOW USAGE 00056 00057 /** 00058 * Fork the maximum number of processes supported by the Aligner. 00059 */ 00060 #define MAX_POSSIBLE_FORK (0) 00061 00062 /** 00063 * Constant used to represent that no special lines will be flushed. 00064 */ 00065 #define NO_FLUSH (-1) 00066 00067 /** 00068 * Constant used to execute all stages. 00069 */ 00070 #define ALL_STAGES (0) 00071 00072 /** 00073 * The default working directory for temprary data. 00074 */ 00075 #define DEFAULT_WORK_DIRECTORY "./work.tmp" 00076 00077 /** 00078 * 00079 */ 00080 #define DEFAULT_PHASE_3_SIZE 16 00081 #define DEFAULT_MPS_STRING "16" // SHOW USAGE 00082 00083 /** 00084 * 00085 */ 00086 #define DEFAULT_MAX_ALIGNMENTS 1 00087 #define DEFAULT_MAX_ALIGNMENTS_STRING "1" 00088 00089 00090 /** 00091 * 00092 */ 00093 #define DEFAULT_BUFFER_LIMIT (1024*1024) 00094 00095 00096 00097 /** 00098 * Only pairwise sequence alignment is supported 00099 */ 00100 #define SEQUENCES_COUNT (2) 00101 00102 /** 00103 * Optarg: command line parameters. 00104 * Characters are short options. Hexadecimals are long options. 00105 */ 00106 // General Options 00107 #define ARG_HELP 'h' 00108 #define ARG_WORK_DIR 'd' 00109 #define ARG_CLEAR 'c' 00110 #define ARG_VERBOSE 'v' 00111 /*#define ARG_GPU 'g' 00112 #define ARG_LIST_GPUS 0x8001 00113 #define ARG_MULTIPLE_GPUS 0x8002 00114 #define ARG_BLOCKS 'b'*/ 00115 #define ARG_SPECIAL_ROWS_DIR 0x8003 00116 #define ARG_SHARED_DIR 0x8004 00117 #define ARG_WAIT_PART 0x8005 00118 #define ARG_FORK 0x8006 00119 00120 // Input Options 00121 #define ARG_TRIM 't' 00122 #define ARG_SPLIT 's' 00123 #define ARG_PART 0x9005 00124 #define ARG_CLEAR_N 0x9006 00125 #define ARG_REVERSE 0x9007 00126 #define ARG_COMPLEMENT 0x9008 00127 #define ARG_REVERSE_COMPLEMENT 0x9009 00128 00129 // Alignment Options 00130 #define ARG_ALIGNMENT_START 0x9101 00131 #define ARG_ALIGNMENT_END 0x9102 00132 #define ARG_ALIGNMENT_EDGES 0x9103 00133 00134 00135 // Execution Options 00136 #define ARG_STAGE_1 '1' 00137 #define ARG_NO_FLUSH 'n' 00138 #define ARG_NO_BLOCK_PRUNING 'p' 00139 #define ARG_DUMP_BLOCKS 0x1007 00140 #define ARG_DISK_SIZE 0x1008 00141 #define ARG_RAM_SIZE 0x1009 00142 #define ARG_FLUSH_COLUMN 0x1010 00143 #define ARG_LOAD_COLUMN 0x1011 00144 #define ARG_ALIGNMENT_ID 0x1012 00145 #define ARG_MAX_ALIGNMENTS 0x1013 00146 00147 #define ARG_MASANET 0x1014 00148 #define ARG_MASANET_CONNECT 0x1015 00149 00150 #define ARG_STAGE_2 '2' 00151 #define ARG_PREDICTED_TRACEBACK 0x2011 00152 00153 #define ARG_STAGE_3 '3' 00154 #define ARG_MAXIMUM_PARTITION 0x3011 00155 #define ARG_NOT_ORTHOGONAL 0x3012 00156 #define ARG_STAGE_4 '4' 00157 #define ARG_STAGE_5 '5' 00158 #define ARG_STAGE_6 '6' 00159 #define ARG_OUTPUT_FORMAT 0x6013 00160 #define ARG_LIST_FORMATS 0x6014 00161 00162 // Tools Options 00163 #define ARG_DRAW_PRUNING 0x7015 00164 #define ARG_TEST 0x7016 00165 00166 00167 #define TOOL_DRAW_PRUNING (1) 00168 00169 00170 /** 00171 * Header 00172 */ 00173 #define MASA_HEADER "\ 00174 Linked with MASA - Malleable Architecture for Sequence Aligners - "PACKAGE_VERSION"\n\ 00175 University of Brasilia/UnB - Brazil \n\ 00176 Copyright (c) 2010-2015 Edans Sandes - License GPLv3\n\ 00177 This program comes with ABSOLUTELY NO WARRANTY.\n\ 00178 \n" 00179 00180 00181 00182 /** 00183 * Usage string to be shown in help. 00184 */ 00185 #define USAGE "\ 00186 Usage: %s [OPTIONS] [FASTA FILE #1] [FASTA FILE #2] \n\ 00187 \n\ 00188 FASTA FILES: Supply two sequences in fasta format files. \n\ 00189 \n\ 00190 \n\ 00191 \033[1mGeneral Options:\033[0m\n\ 00192 \n\ 00193 -h, --help Shows this help.\n\ 00194 -d, --work-dir=DIR Directory used to store files produced by the stages.\n\ 00195 Default: "DEFAULT_WORK_DIRECTORY" \n\ 00196 --special-rows-dir=DIR Directory used to store the special rows produced by\n\ 00197 the gpu stages. The default is to use a subfolder of\n\ 00198 the work directory (see --work-dir parameter).\n\ 00199 --shared-dir=DIR Directory used to share data between forked instances.\n\ 00200 --wait-part=PART Process will wait until the conclusion of --part=PART.\n\ 00201 -c, --clear Clears the work directory before any computation. This \n\ 00202 prevents the continuation of previously interrupted \n\ 00203 execution.\n\ 00204 -v, --verbose=LEVEL Shows informative output during computation. \n\ 00205 0: Silently;\n\ 00206 1: Only shows error messages;\n\ 00207 2: (Default) Shows progress and statistics; \n\ 00208 3: Gives full output data.\n\ 00209 --fork Fork many processes in order to optimize performance. \n\ 00210 --fork=COUNT Fork with a limited number of processes.\n\ 00211 --fork=W1,W2,...,Wn Fork with the given weight proportions.\n\ 00212 \n\ 00213 \n\ 00214 \033[1mInput Options:\033[0m\n\ 00215 \n\ 00216 -t, --trim=I0,I1,J0,J1 Trims sequence #1 from position I0 to I1 (inclusive). \n\ 00217 and sequence #2 from position J0 to J1 (inclusive). \n\ 00218 Zero represents either first and last positions. \n\ 00219 This parameter is ignored if used together with the \n\ 00220 --split parameter. \n\ 00221 --split=COUNT Splits sequence #2 in COUNT equal segments. This \n\ 00222 parameter must be used together with the \n\ 00223 --part parameter. \n\ 00224 --split=W1,W2,...,Wn Splits sequence #2 in n segments with weighted \n\ 00225 proportions. This parameter must be used \n\ 00226 together with the --part parameter. \n\ 00227 --part=PART When the --split parameter is used, the sequence #2 is \n\ 00228 divided in many parts. The --part parameter selects \n\ 00229 which part will be executed by this process. \n\ 00230 If the --load-columns and --flush-columns parameters\n\ 00231 are not set, then the last column will be saved into\n\ 00232 a file in the current directory. \n\ 00233 --clear-n Remove all 'N' characters on both fasta files.\n\ 00234 --reverse=[1|2|both] Reverse strands of sequence 1, 2 or both. \n\ 00235 --complement=[1|2|both] Generate complement (AT,CG) for sequence 1, 2 or both. \n\ 00236 --reverse-complement=[1|2|both] \n\ 00237 Generate reverse-complement (opposite strand) for \n\ 00238 sequence 1, 2 or both. This parameter joins the \n\ 00239 --reverse and --complement parameters. \n\ 00240 \n\ 00241 \033[1mAlignment Type:\033[0m\n\ 00242 \n\ 00243 --alignment-start=[*|1|2|3|+] \n\ 00244 --alignment-end= [*|1|2|3|+] \n\ 00245 --alignment-edges=[*|1|2|3|+][*|1|2|3|+] (start,end)\n\ 00246 Defines where the alignment can start or end. \n\ 00247 - *: any location.\n\ 00248 - 1: start/end of sequence 1. \n\ 00249 - 2: start/end of sequence 2. \n\ 00250 - 3: start/end of sequences 1 or 2. \n\ 00251 - +: start/end of sequences 1 and 2. \n\ 00252 \n\ 00253 \033[1mStage Options:\033[0m\n\ 00254 \n\ 00255 \033[1mStage #1 Options:\033[0m\n\ 00256 -1, --stage-1 Executes only the stage #1 of algorithm, i.e., returns \n\ 00257 the best score and its coordinates. Special rows \n\ 00258 are stored in disk to allow the execution of the \n\ 00259 subsequent stages.\n\ 00260 -n, --no-flush Do not save special rows. Using this option \n\ 00261 in stage #1 will prevent the execution of subsequent\n\ 00262 phases.\n\ 00263 -p, --no-block-pruning Does not use the block pruning optimization \n\ 00264 \n\ 00265 --disk-size=SIZE Limits the disk/ram size available to the special rows.\n\ 00266 --ram-size=SIZE The SIZE parameter may contain suffix M (e.g., 500M)\n\ 00267 or G (e.g., 10G). This option is ignored if used\n\ 00268 together with the --no-flush parameter. \n\ 00269 Default values: "DEFAULT_FLUSH_RAM_STRING"/"DEFAULT_FLUSH_DISK_STRING".\n\ 00270 --flush-column=URL Store the last column cells in some destination. The \n\ 00271 URL is given in some of these formats: \n\ 00272 file://PATH_TO_FILE \n\ 00273 socket://0.0.0.0:LISTENING_PORT \n\ 00274 --load-column=URL Loads the first column cells from some destination. The\n\ 00275 URL is given in some of these formats: \n\ 00276 file://PATH_TO_FILE \n\ 00277 socket://HOSTNAME:PORT \n\ 00278 --dump-blocks Saves the result of each block in the alignment file. \n\ 00279 --max-alignments Maximum number of alignments to return. Default:"DEFAULT_MAX_ALIGNMENTS_STRING".\n\ 00280 \n\ 00281 \033[1mStage #2 Options:\033[0m\n\ 00282 -2, --stage-2 Executes only the stage #2 of algorithm, i.e., returns \n\ 00283 a list of crosspoints inside the optimal alignment. \n\ 00284 Special columns are stored in disk to allow the \n\ 00285 execution of the subsequent stages. The disk size \n\ 00286 available to store the special columns may be \n\ 00287 configured using the --disk-space parameter. \n\ 00288 \n\ 00289 \033[1mStage #3 Options:\033[0m\n\ 00290 -3, --stage-3 Executes only the stage #3 of algorithm, i.e., returns \n\ 00291 a bigger list of crosspoints inside the optimal \n\ 00292 alignment.\n\ 00293 \n\ 00294 \033[1mStage #4 Options:\033[0m\n\ 00295 -4, --stage-4 Executes only the stage #4 of algorithm, i.e., given a \n\ 00296 list of coordinates of the optimal alignment, \n\ 00297 increases the number of crosspoint using \n\ 00298 Myers and Miller's algorithm, until all the \n\ 00299 partitions are smaller than the maximum partition \n\ 00300 size.\n\ 00301 --maximum-partition=SIZE \n\ 00302 Defines the maximum partition size allowed as output \n\ 00303 of the stage #4. This parameter limits the size of \n\ 00304 partitions processed in stage #5. \n\ 00305 Default Value: "DEFAULT_MPS_STRING" \n\ 00306 --not-orthogonal Does not use the orthogonal execution otimization. \n\ 00307 \n\ 00308 \033[1mStage #5 Options:\033[0m\n\ 00309 -5, --stage-5 Executes only the stage #5 of algorithm, i.e., given \n\ 00310 a list of coordinates of the optimal alignment, \n\ 00311 returns the full alignment (as binary output). \n\ 00312 \n\ 00313 \033[1mStage #6 Options:\033[0m\n\ 00314 -6, --stage-6 Executes only the stage #6 of algorithm, i.e., given \n\ 00315 an alignment in binary format, returns the full \n\ 00316 alignment in the format defined in with the \n\ 00317 --output-format argument.\n\ 00318 --output-format=FORMAT Selects the output format of the full alignment \n\ 00319 in stage #6. Possibile formats may be listed with \n\ 00320 the --list-formats parameter. \n\ 00321 --list-formats Lists all the possible output formats for stage #6. \n\ 00322 \n\ 00323 \n\ 00324 " 00325 00326 00327 /** 00328 * Shows the usage of the command line tool. 00329 */ 00330 static void show_usage(char* program_name, Job* job) { 00331 printf ( USAGE, program_name ); 00332 IAlignerParameters* params = job->aligner->getParameters(); 00333 params->printUsage(); 00334 } 00335 00336 static void print_header(char* aligner_header, FILE* file = NULL) { 00337 if (aligner_header != NULL) { 00338 if (file == NULL) { 00339 fprintf (stdout, "\n\033[1m%s\033[0m\n", aligner_header); 00340 } else { 00341 fprintf (file, "\n%s\n", aligner_header); 00342 } 00343 } 00344 fprintf (file == NULL ? stdout : file, MASA_HEADER ); 00345 } 00346 00347 static int parse_sequence_flags ( char* optarg, bool* flag ) { 00348 if ( strcasecmp ( optarg, "none" ) ==0 ) { 00349 flag[0] = false; 00350 flag[1] = false; 00351 } else if ( strcasecmp ( optarg, "1" ) ==0 ) { 00352 flag[0] = true; 00353 flag[1] = false; 00354 } else if ( strcasecmp ( optarg, "2" ) ==0 ) { 00355 flag[0] = false; 00356 flag[1] = true; 00357 } else if ( strcasecmp ( optarg, "both" ) ==0 ) { 00358 flag[0] = true; 00359 flag[1] = true; 00360 } else { 00361 return 0; 00362 } 00363 return 1; 00364 } 00365 00366 static int parse_alignment_flags ( char c, int* flag ) { 00367 if ( c == '*' ) { 00368 *flag = AT_ANYWHERE; 00369 } else if ( c == '1' ) { 00370 *flag = AT_SEQUENCE_1; 00371 } else if ( c == '2' ) { 00372 *flag = AT_SEQUENCE_2; 00373 } else if ( c == '3' ) { 00374 *flag = AT_SEQUENCE_1_OR_2; 00375 } else if ( c == '+' ) { 00376 *flag = AT_SEQUENCE_1_AND_2; 00377 } else { 00378 return 0; 00379 } 00380 return 1; 00381 } 00382 00383 static int parse_proportions ( char* optarg, int** proportions, char* param_name, char* current_arg ) { 00384 const int MAX_PROPORTIONS = 32; 00385 int count = 0; 00386 for (char* c=optarg; *c; c++) { 00387 if (*c == ',') continue; 00388 if (*c < '0' || *c > '9') { 00389 throw IllegalArgumentException("Only comma separated numbers are allowed (e.g. COUNT or W1,W2,...,Wn).", current_arg); 00390 } 00391 } 00392 if (strchr(optarg, ',') != NULL) { 00393 char tmparg[strlen(optarg)+1]; 00394 strcpy(tmparg, optarg); // avoid parameter destruction 00395 char* tok = strtok(tmparg, ","); 00396 int* weights = new int[MAX_PROPORTIONS]; 00397 while (tok != NULL && count < MAX_PROPORTIONS) { 00398 sscanf(tok, "%d", &weights[count]); 00399 if (weights[count] <= 0) { 00400 throw IllegalArgumentException("Proportion weights must be positive integers.", current_arg); 00401 } 00402 count++; 00403 tok = strtok(NULL, ","); 00404 } 00405 if (count >= MAX_PROPORTIONS) { 00406 fprintf(stderr, "Error: Maximum proportion weights is %d.\n", MAX_PROPORTIONS-1); 00407 throw IllegalArgumentException("Too many proportion weights.", current_arg); 00408 } 00409 weights[count] = 0; 00410 *proportions = weights; 00411 } else { 00412 sscanf( optarg, "%d", &count); 00413 if (count <= 0) { 00414 throw IllegalArgumentException("Argument must be a positive integer.", current_arg); 00415 } 00416 *proportions = NULL; 00417 } 00418 return count; 00419 00420 } 00421 00422 static long long parse_size(char* optarg, char* current_arg) { 00423 long long size; 00424 string str; 00425 str = optarg; 00426 if (str == "0") { 00427 return 0; 00428 } 00429 char suffix = str[str.length()-1]; 00430 str[str.length()-1] = 0; 00431 switch ( suffix ) { 00432 case 'K': 00433 size = ( long long ) ( atof ( str.c_str() ) *1024LL ); 00434 break; 00435 case 'M': 00436 size = ( long long ) ( atof ( str.c_str() ) *1024*1024LL ); 00437 break; 00438 case 'G': 00439 size = ( long long ) ( atof ( str.c_str() ) *1024*1024*1024LL ); 00440 break; 00441 default: 00442 throw IllegalArgumentException("Wrong size suffix (use 'K', 'M' or 'G').", current_arg); 00443 } 00444 /*if ( size == 0 ) { 00445 throw IllegalArgumentException("Wrong size limit.", current_arg); 00446 }*/ 00447 return size; 00448 } 00449 00450 /** 00451 * Show possible output formats used in stage #6-> 00452 */ 00453 static void print_output_formats ( FILE* file=stdout ) { 00454 fprintf ( file, "Output Formats\n" ); 00455 fprintf ( file, "%10s: %s\n", "NAME", "DESCRIPTION" ); 00456 fprintf ( file, "---------------------------\n" ); 00457 for ( output_format_t* format = stage6_formats; format->name; format++ ) { 00458 fprintf ( file, "%10s: %s\n", format->name, format->description ); 00459 } 00460 } 00461 00462 /** 00463 * Load sequence with proper flags. 00464 */ 00465 /*static int getFlags (int clear_n, int reverse, int complement) { 00466 00467 // Select Flags 00468 int flags = 0; 00469 if ( clear_n ) flags = ( flags | FLAG_CLEAR_N ); 00470 if ( reverse ) flags = ( flags | FLAG_REVERSE ); 00471 if ( complement ) flags = ( flags | FLAG_COMPLEMENT ); 00472 00473 return flags; 00474 }*/ 00475 00476 static void split_sequences ( Job* _job, int split_step, int split_count, int* weights, int wait_step ) { 00477 long long int proportions[split_count+1]; 00478 00479 proportions[0] = 0; 00480 for ( int i=0; i<split_count; i++ ) { 00481 proportions[i+1] = proportions[i] + weights[i]; 00482 } 00483 long long int sum = proportions[split_count]; 00484 for ( int i=0; i<split_count; i++ ) { 00485 printf ( "split[%d]: %.2f%%\n", i, 00486 ( proportions[i+1]-proportions[i] ) *100.00/sum ); 00487 } 00488 00489 /* Process split positions. The 'trim_xx' arguments are overwrited. */ 00490 int seq1_len = _job->getAlignmentParams()->getSequence(1)->getLen(); 00491 int trim_j0 = ( int ) ( ( (( long long int ) seq1_len)*proportions[split_step-1] ) /sum + 1 ); 00492 int trim_j1 = ( int ) ( ( (( long long int ) seq1_len)*proportions[split_step] ) /sum ); 00493 //int trim_j0 = ( int ) ( ( ( long long int ) seq1_len* ( split_step-1 ) ) /split_count + 1 ); 00494 //int trim_j1 = ( int ) ( ( ( long long int ) seq1_len*split_step ) /split_count ); 00495 if ( split_step > 1 && _job->load_column_url == "" ) { 00496 char str[128]; 00497 sprintf ( str, "file://STEP-%d-%d-%d.tmp", 00498 split_step-1, split_count, trim_j0-1 ); 00499 _job->load_column_url = str; 00500 } 00501 if ( split_step < split_count && _job->flush_column_url == "" ) { 00502 char str[128]; 00503 sprintf ( str, "file://STEP-%d-%d-%d.tmp", 00504 split_step, split_count, trim_j1 ); 00505 _job->flush_column_url = str; 00506 } 00507 _job->getAlignmentParams()->getSequence(1)->trim(trim_j0, trim_j1); 00508 00509 if (wait_step >= 0) { 00510 int wait_id = ( int ) ( ( (( long long int ) seq1_len)*proportions[wait_step] ) /sum ); 00511 _job->setPoolWaitId(wait_id); 00512 } 00513 00514 } 00515 00516 /** 00517 * Fork process for mutigpu execution. 00518 */ 00519 static int fork_multi_process ( int count, Job* _job, const int* weights, int split_step) { 00520 IAligner* aligner = _job->aligner; 00521 IAlignerParameters* param = _job->aligner->getParameters(); 00522 00523 if (weights == NULL) { 00524 fprintf (stderr, "No forked instances allowed.\n"); 00525 exit(1); 00526 } 00527 00528 int firstId=-1; 00529 int lastId=-1; 00530 long long int proportions[count+1]; 00531 int previous_id[count+1]; 00532 00533 proportions[0] = 0; 00534 for ( int i=0; i<count; i++ ) { 00535 proportions[i+1] = proportions[i] + weights[i]; 00536 if (weights[i] > 0) { 00537 if (firstId == -1) { 00538 firstId = i; 00539 } 00540 previous_id[i] = lastId; 00541 lastId = i; 00542 } 00543 } 00544 long long int sum = proportions[count]; 00545 for ( int i=0; i<count; i++ ) { 00546 printf ( "fork[%d%c]: %.2f%%\n", i, 00547 (i >= firstId && i <= lastId)?'+':' ', 00548 ( proportions[i+1]-proportions[i] ) *100.00/sum ); 00549 } 00550 if (firstId == -1) { 00551 fprintf (stderr, "No forked instances with valid weight.\n"); 00552 exit(1); 00553 } 00554 00555 00556 //int child_pid[gpus]; 00557 bool parent; 00558 for ( int i=0; i<count; i++ ) { 00559 if (weights[i] <= 0) continue; 00560 int pid = fork(); 00561 if ( pid == 0 ) { 00562 parent = false; 00563 param->setForkId(i); 00564 if ( i > firstId ) { 00565 char str[128]; 00566 sprintf ( str, "socket://127.0.0.1:%d", 00567 7000 + previous_id[i] + split_step*count ); 00568 _job->load_column_url = str; 00569 } 00570 if ( i < lastId ) { 00571 char str[128]; 00572 sprintf ( str, "socket://127.0.0.1:%d", 00573 7000 + i + split_step*count); 00574 _job->flush_column_url = str; 00575 } 00576 //_job->gpu = i; 00577 break; 00578 } 00579 //sleep(5); // FIXME 00580 fprintf(stderr, "+PID: %d\n", pid); 00581 //child_pid[i] = pid; 00582 parent = true; 00583 } 00584 if ( parent ) { 00585 int pid; 00586 int successful = 1; 00587 do { 00588 int status; 00589 pid = wait(&status); 00590 if ( pid == -1 && errno != ECHILD ) { 00591 perror ( "Error during wait()\n" ); 00592 abort(); 00593 } 00594 fprintf(stderr, "-PID(%d): %s (%d) %s %d\n", pid, 00595 WIFEXITED(status)?"child return code":"abnormal error code", 00596 WIFEXITED(status)?(char)WEXITSTATUS(status):status, 00597 WIFSIGNALED(status)?"Signalized: ":"-", 00598 WIFSIGNALED(status)?WTERMSIG(status):0); 00599 if (!WIFEXITED(status)) { 00600 successful = false; 00601 } 00602 } while ( pid > 0 ); 00603 if (successful) { 00604 fprintf(stderr, "Processes terminated normally\n"); 00605 } else { 00606 fprintf(stderr, "Some process aborted the execution.\n"); 00607 } 00608 exit(0); 00609 } 00610 00611 int seq1_len = _job->getAlignmentParams()->getSequence(1)->getLen(); 00612 00613 int trim_j0 = ( int ) ( ( ( long long int ) seq1_len*proportions[param->getForkId()] ) /sum + 1 ); 00614 int trim_j1 = ( int ) ( ( ( long long int ) seq1_len*proportions[param->getForkId()+1] ) /sum ); 00615 //_job->ram_limit = ( int ) ( ( ( long long int ) _job->ram_limit*weights[param->getForkId()] ) /sum ); 00616 //_job->disk_limit = ( int ) ( ( ( long long int ) _job->disk_limit*weights[param->getForkId()] ) /sum ); 00617 00618 _job->getAlignmentParams()->getSequence(1)->trim(trim_j0, trim_j1); 00619 00620 return 0; 00621 } 00622 00623 void executeTraceback(Job* _job, Timer* timer, int count, int ev_stage2, int ev_stage3, int ev_stage4, int ev_stage5, int ev_stage6) { 00624 for (int id = 0; id < count; id++) { 00625 stage2(_job, id); 00626 timer->eventRecord(ev_stage2); 00627 stage3(_job, id); 00628 timer->eventRecord(ev_stage3); 00629 stage4(_job, id); 00630 timer->eventRecord(ev_stage4); 00631 stage5(_job, id); 00632 timer->eventRecord(ev_stage5); 00633 stage6(_job, id); 00634 timer->eventRecord(ev_stage6); 00635 } 00636 } 00637 00638 /* 00639 * Program entry point. 00640 */ 00641 int libmasa_entry_point(int argc, char** argv, IAligner* aligner, char* aligner_header) { 00642 print_header(aligner_header); 00643 00644 //configs->printFile(stdout); 00645 Job* _job = new Job(SEQUENCES_COUNT); 00646 _job->configs = new Configs(); 00647 00648 //AlignerManager::setAligner(new ExampleAligner()); 00649 00650 AlignmentParams* alignment_params = _job->getAlignmentParams(); 00651 alignment_params->setAlignmentMethod(ALIGNMENT_METHOD_LOCAL); 00652 //alignment_params->setAffineGapPenalties(-DNA_GAP_OPEN, -DNA_GAP_EXT); 00653 //alignment_params->setMatchMismatchScores(DNA_MATCH, DNA_MISMATCH); 00654 const score_params_t* score_params = aligner->getScoreParameters(); 00655 alignment_params->setAffineGapPenalties(-score_params->gap_open, -score_params->gap_ext); 00656 alignment_params->setMatchMismatchScores(score_params->match, score_params->mismatch); 00657 00658 /* Default Values */ 00659 int verbosity = 2; 00660 bool clear_work_directory = false; 00661 int fork_count = NOT_FORKED_INSTANCE; 00662 const int* fork_proportions = NULL; 00663 _job->disk_limit = DEFAULT_DISK_LIMIT; 00664 _job->ram_limit = DEFAULT_RAM_LIMIT; 00665 _job->block_pruning = true; 00666 _job->dump_blocks = false; 00667 _job->setWorkPath ( DEFAULT_WORK_DIRECTORY ); 00668 _job->stage4_maximum_partition_size = DEFAULT_PHASE_3_SIZE; 00669 _job->stage4_orthogonal_execution = true; 00670 _job->stage6_output_format = 0; 00671 //_job->gpu = DETECT_FASTEST_GPU; 00672 //_job->blocks = 0; 00673 _job->flush_column_url = ""; 00674 _job->load_column_url = ""; 00675 _job->alignment_start = AT_ANYWHERE; 00676 _job->alignment_end = AT_ANYWHERE; 00677 _job->max_alignments = DEFAULT_MAX_ALIGNMENTS; 00678 _job->peer_listen_port = -1; 00679 _job->predicted_traceback = false; 00680 _job->setBufferLimit(DEFAULT_BUFFER_LIMIT); 00681 int phase = ALL_STAGES; 00682 int tool = 0; 00683 int trim_start[2] = {0, 0}; 00684 int trim_end[2] = {0, 0}; 00685 int split_step = 0; 00686 int split_count = 0; 00687 int wait_part = -1; 00688 int* split_proportions = NULL; 00689 int alignment_id = 0; 00690 bool clear_n = false; 00691 bool reverse_seq[SEQUENCES_COUNT] = {false, false}; 00692 bool complement_seq[SEQUENCES_COUNT] = {false, false}; 00693 char *fasta_file[SEQUENCES_COUNT]; 00694 00695 //_job->alignerParameter = AlignerFactory::createAlignerParameter(); 00696 //_job->alignerParameter = new ExampleParameters(); 00697 _job->aligner = aligner; 00698 00699 int c; 00700 //string arg_error = ""; 00701 00702 static struct option long_options[] = { 00703 // General Options 00704 {"help", no_argument, 0, ARG_HELP}, 00705 {"work-dir", required_argument, 0, ARG_WORK_DIR}, 00706 {"special-rows-dir", required_argument, 0, ARG_SPECIAL_ROWS_DIR}, 00707 {"shared-dir", required_argument, 0, ARG_SHARED_DIR}, 00708 {"wait-part", required_argument, 0, ARG_WAIT_PART}, 00709 {"clear", no_argument, 0, ARG_CLEAR}, 00710 {"verbose", required_argument, 0, ARG_VERBOSE}, 00711 /*{"gpu", required_argument, 0, ARG_GPU}, 00712 {"list-gpus", no_argument, 0, ARG_LIST_GPUS}, 00713 {"multigpu", no_argument, 0, ARG_MULTIPLE_GPUS},*/ 00714 //{"blocks", required_argument, 0, ARG_BLOCKS}, 00715 {"fork", optional_argument, 0, ARG_FORK}, 00716 00717 // Input Options 00718 {"trim", required_argument, 0, ARG_TRIM}, 00719 {"split", required_argument, 0, ARG_SPLIT}, 00720 {"part", required_argument, 0, ARG_PART}, 00721 {"clear-n", no_argument, 0, ARG_CLEAR_N}, 00722 {"reverse", required_argument, 0, ARG_REVERSE}, 00723 {"complement", required_argument, 0, ARG_COMPLEMENT}, 00724 {"reverse-complement", required_argument, 0, ARG_REVERSE_COMPLEMENT}, 00725 00726 // Input Options 00727 {"alignment-start", required_argument, 0, ARG_ALIGNMENT_START}, 00728 {"alignment-end", required_argument, 0, ARG_ALIGNMENT_END}, 00729 {"alignment-edges", required_argument, 0, ARG_ALIGNMENT_EDGES}, 00730 00731 // Execution Options 00732 {"stage-1", no_argument, 0, ARG_STAGE_1}, 00733 {"no-flush", no_argument, 0, ARG_NO_FLUSH}, 00734 {"disk-size", required_argument, 0, ARG_DISK_SIZE}, 00735 {"ram-size", required_argument, 0, ARG_RAM_SIZE}, 00736 {"flush-column", required_argument, 0, ARG_FLUSH_COLUMN}, 00737 {"load-column", required_argument, 0, ARG_LOAD_COLUMN}, 00738 {"no-block-pruning", no_argument, 0, ARG_NO_BLOCK_PRUNING}, 00739 {"dump-blocks", no_argument, 0, ARG_DUMP_BLOCKS}, 00740 {"alignment-id", required_argument, 0, ARG_ALIGNMENT_ID}, 00741 {"max-alignments", required_argument, 0, ARG_MAX_ALIGNMENTS}, 00742 // Masanet 00743 {"masanet", optional_argument, 0, ARG_MASANET}, 00744 {"masanet-connect", required_argument, 0, ARG_MASANET_CONNECT}, 00745 00746 00747 00748 {"stage-2", no_argument, 0, ARG_STAGE_2}, 00749 {"predicted-traceback", no_argument, 0, ARG_PREDICTED_TRACEBACK}, 00750 00751 {"stage-3", no_argument, 0, ARG_STAGE_3}, 00752 00753 {"stage-4", optional_argument, 0, ARG_STAGE_4}, 00754 {"maximum-partition", required_argument, 0, ARG_MAXIMUM_PARTITION}, 00755 {"not-orthogonal", no_argument, 0, ARG_NOT_ORTHOGONAL}, 00756 00757 {"stage-5", no_argument, 0, ARG_STAGE_5}, 00758 00759 {"stage-6", no_argument, 0, ARG_STAGE_6}, 00760 {"output-format", required_argument, 0, ARG_OUTPUT_FORMAT}, 00761 {"list-formats", no_argument, 0, ARG_LIST_FORMATS}, 00762 00763 // Tools Options 00764 //{"draw-pruning", no_argument, 0, ARG_DRAW_PRUNING}, 00765 {"test", required_argument, 0, ARG_TEST}, 00766 00767 {0, 0, 0, 0} 00768 }; 00769 00770 opterr = 0; // prevent the error message from getopt 00771 00772 try { 00773 while ( 1 ) { 00774 00775 /* getopt_long stores the option index here. */ 00776 int option_index = 0; 00777 00778 c = getopt_long ( argc, argv, ":hd:cv:b:t:s:n123456", 00779 long_options, &option_index ); 00780 char* current_arg = argv[optind-1]; 00781 00782 /* Detect the end of the options. */ 00783 if ( c == -1 ) 00784 break; 00785 00786 //printf("c: %c\n", c); 00787 switch ( c ) { 00788 00789 case ARG_HELP: 00790 show_usage(argv[0], _job); 00791 exit ( 2 ); 00792 break; 00793 case ARG_WORK_DIR: 00794 try { 00795 _job->setWorkPath( optarg ); 00796 } catch (IllegalArgumentException& err) { 00797 throw IllegalArgumentException(err.getErr().c_str(), current_arg); 00798 } 00799 break; 00800 case ARG_SPECIAL_ROWS_DIR: 00801 try { 00802 _job->setSpecialRowsPath( optarg ); 00803 } catch (IllegalArgumentException& err) { 00804 throw IllegalArgumentException(err.getErr().c_str(), current_arg); 00805 } 00806 break; 00807 case ARG_SHARED_DIR: 00808 try { 00809 _job->setSharedPath( optarg ); 00810 } catch (IllegalArgumentException& err) { 00811 throw IllegalArgumentException(err.getErr().c_str(), current_arg); 00812 } 00813 break; 00814 case ARG_WAIT_PART: 00815 try { 00816 wait_part = atoi ( optarg ); 00817 } catch (IllegalArgumentException& err) { 00818 throw IllegalArgumentException(err.getErr().c_str(), current_arg); 00819 } 00820 break; 00821 case ARG_CLEAR: 00822 clear_work_directory = true; 00823 break; 00824 case ARG_VERBOSE: 00825 verbosity = atoi ( optarg ); 00826 if ( verbosity < 0 || verbosity > 2) { 00827 throw IllegalArgumentException("Verbosity must lie in range [0..2].", current_arg); 00828 } 00829 break; 00830 case ARG_FORK: 00831 if (optarg == NULL) { 00832 fork_proportions = aligner->getForkWeights(); 00833 fork_count = 0; 00834 while (fork_proportions[fork_count] != 0) { // zero-terminated vector 00835 fork_count++; 00836 } 00837 } else { 00838 int* weights; 00839 fork_count = parse_proportions(optarg, &weights, "Fork count", current_arg); 00840 if (weights == NULL) { 00841 fork_proportions = aligner->getForkWeights(); 00842 } else { 00843 fork_proportions = weights; 00844 } 00845 /*if (strchr(optarg, '/') != NULL) { 00846 char* tok = strtok(optarg, "/"); 00847 fork_count = 0; 00848 const int MAX_FORK = 8; 00849 int* weights = new int[MAX_FORK]; 00850 while (tok != NULL && fork_count < (MAX_FORK-1)) { 00851 sscanf(tok, "%d", &weights[fork_count++]); 00852 tok = strtok(NULL, "/"); 00853 } 00854 weights[fork_count] = 0; 00855 fork_proportions = weights; 00856 } else { 00857 sscanf( optarg, "%d", &fork_count); 00858 if (fork_count <= 0) { 00859 throw IllegalArgumentException("Fork count must be positive.", current_arg); 00860 } 00861 fork_proportions = aligner->getForkWeights(); 00862 }*/ 00863 } 00864 break; 00865 case ARG_TRIM: 00866 if ( optarg != NULL ) { 00867 sscanf ( optarg, "%d,%d,%d,%d", 00868 &trim_start[0], &trim_end[0], &trim_start[1], &trim_end[1] ); 00869 if ((trim_end[0] > 0 && trim_start[0] >= trim_end[0]) 00870 || (trim_end[1] > 0 && trim_start[1] >= trim_end[1])) { 00871 throw IllegalArgumentException("--trim ranges cannot be decreasing.", current_arg); 00872 } 00873 } 00874 break; 00875 case ARG_SPLIT: 00876 if ( optarg != NULL ) { 00877 int* weights; 00878 split_count = parse_proportions(optarg, &weights, "Split count", current_arg); 00879 if (weights == NULL) { 00880 split_proportions = new int[split_count+1]; 00881 for (int i=0; i<split_count; i++) { 00882 split_proportions[i] = 1; 00883 } 00884 split_proportions[split_count] = 0; 00885 } else { 00886 split_proportions = weights; 00887 } 00888 00889 /*int step; 00890 int count; 00891 sscanf ( optarg, "%d/%d", &split_step, &split_count ); 00892 if ( split_step > split_count || split_step <= 0 ) { 00893 throw IllegalArgumentException("SPLIT_STEP must lie in range [1..SPLIT_COUNT].", current_arg); 00894 }*/ 00895 } else { 00896 throw IllegalArgumentException("Inform --split parameters.", current_arg); 00897 } 00898 break; 00899 case ARG_PART: 00900 if ( optarg != NULL ) { 00901 sscanf ( optarg, "%d", &split_step ); 00902 } else { 00903 throw IllegalArgumentException("Inform --part parameter.", current_arg); 00904 } 00905 break; 00906 case ARG_CLEAR_N: 00907 clear_n = true; 00908 break; 00909 case ARG_REVERSE: 00910 if ( !parse_sequence_flags ( optarg, reverse_seq ) ) { 00911 throw IllegalArgumentException("Wrong reverse argument. Choose 'none', '1', '2' or 'both'.", current_arg); 00912 } 00913 break; 00914 case ARG_COMPLEMENT: 00915 if ( !parse_sequence_flags ( optarg, complement_seq ) ) { 00916 throw IllegalArgumentException("Wrong complement argument. Choose 'none', '1', '2' or 'both'.", current_arg); 00917 } 00918 break; 00919 case ARG_REVERSE_COMPLEMENT: 00920 if ( !parse_sequence_flags ( optarg, complement_seq ) ) { 00921 throw IllegalArgumentException("Wrong reverse-complement argument. Choose 'none', '1', '2' or 'both'.", current_arg); 00922 } else { 00923 reverse_seq[0] = complement_seq[0]; 00924 reverse_seq[1] = complement_seq[1]; 00925 } 00926 break; 00927 00928 case ARG_ALIGNMENT_START: 00929 if ( !parse_alignment_flags ( optarg[0], &_job->alignment_start ) ) { 00930 throw IllegalArgumentException("Wrong alignment start argument. Choose '*', '1', '2', '3' or '+'.", current_arg); 00931 } 00932 break; 00933 case ARG_ALIGNMENT_END: 00934 if ( !parse_alignment_flags ( optarg[0], &_job->alignment_end ) ) { 00935 throw IllegalArgumentException("Wrong alignment end argument. Choose '*', '1', '2', '3' or '+'.", current_arg); 00936 } 00937 break; 00938 case ARG_ALIGNMENT_EDGES: 00939 if ( !parse_alignment_flags ( optarg[0], &_job->alignment_start ) ) { 00940 throw IllegalArgumentException("Wrong alignment start argument. Choose '*', '1', '2', '3' or '+'.", current_arg); 00941 } 00942 if ( !parse_alignment_flags ( optarg[1], &_job->alignment_end ) ) { 00943 throw IllegalArgumentException("Wrong alignment end argument. Choose '*', '1', '2', '3' or '+'.", current_arg); 00944 } 00945 break; 00946 case ARG_STAGE_1: 00947 phase = STAGE_1; 00948 break; 00949 case ARG_NO_FLUSH: 00950 _job->disk_limit = NO_FLUSH; 00951 _job->ram_limit = NO_FLUSH; 00952 break; 00953 case ARG_NO_BLOCK_PRUNING: 00954 _job->block_pruning = false; 00955 break; 00956 case ARG_DUMP_BLOCKS: 00957 _job->dump_blocks = true; 00958 break; 00959 case ARG_DISK_SIZE: 00960 if ( _job->disk_limit != NO_FLUSH ) { 00961 _job->disk_limit = parse_size(optarg, current_arg); 00962 } 00963 break; 00964 case ARG_RAM_SIZE: 00965 if ( _job->ram_limit != NO_FLUSH ) { 00966 _job->ram_limit = parse_size(optarg, current_arg); 00967 } 00968 break; 00969 case ARG_FLUSH_COLUMN: 00970 _job->flush_column_url = optarg; 00971 _job->block_pruning = false; // TODO 00972 fprintf(stderr, "Warning: Block Pruning is not compatible with multigpus yet.\n"); 00973 break; 00974 case ARG_LOAD_COLUMN: 00975 _job->load_column_url = optarg; 00976 _job->block_pruning = false; // TODO 00977 fprintf(stderr, "Warning: Block Pruning is not compatible with multigpus yet.\n"); 00978 break; 00979 00980 case ARG_ALIGNMENT_ID: 00981 sscanf ( optarg, "%d", &alignment_id ); 00982 if ( alignment_id < 0 ) { 00983 throw IllegalArgumentException("Wrong alignment id.", current_arg); 00984 } 00985 break; 00986 case ARG_MAX_ALIGNMENTS: 00987 sscanf ( optarg, "%d", &_job->max_alignments ); 00988 if (_job->max_alignments < 0) { 00989 throw IllegalArgumentException("Wrong max alignment.", current_arg); 00990 } 00991 break; 00992 00993 case ARG_MASANET: 00994 if ( optarg != NULL ) { 00995 _job->peer_listen_port = atoi ( optarg ); 00996 } else { 00997 _job->peer_listen_port = 0; 00998 } 00999 break; 01000 case ARG_MASANET_CONNECT: 01001 _job->peer_connect = optarg; 01002 break; 01003 01004 case ARG_STAGE_2: 01005 phase = STAGE_2; 01006 break; 01007 01008 case ARG_PREDICTED_TRACEBACK: 01009 _job->predicted_traceback = true; 01010 break; 01011 01012 case ARG_STAGE_3: 01013 phase = STAGE_3; 01014 break; 01015 case ARG_MAXIMUM_PARTITION: 01016 sscanf ( optarg, "%d", &_job->stage4_maximum_partition_size ); 01017 if (_job->stage4_maximum_partition_size < 1) { 01018 throw IllegalArgumentException("Maximum partition size too small.", current_arg); 01019 } 01020 break; 01021 case ARG_NOT_ORTHOGONAL: 01022 _job->stage4_orthogonal_execution = false; 01023 break; 01024 01025 case ARG_STAGE_4: 01026 phase = STAGE_4; 01027 break; 01028 01029 case ARG_STAGE_5: 01030 phase = STAGE_5; 01031 break; 01032 01033 case ARG_STAGE_6: 01034 phase = STAGE_6; 01035 break; 01036 case ARG_DRAW_PRUNING: 01037 tool = TOOL_DRAW_PRUNING; 01038 break; 01039 case ARG_LIST_FORMATS: 01040 print_output_formats(); 01041 exit ( 1 ); 01042 break; 01043 case ARG_OUTPUT_FORMAT: 01044 _job->stage6_output_format = -1; 01045 for ( int id=0; stage6_formats[id].name != NULL; id++ ) { 01046 if ( strcasecmp ( optarg, stage6_formats[id].name ) ==0 ) { 01047 _job->stage6_output_format = id; 01048 } 01049 } 01050 if ( _job->stage6_output_format == -1 ) { 01051 stringstream out; 01052 out << "Wrong output format: " << optarg << "."; 01053 throw IllegalArgumentException(out.str().c_str(), current_arg); 01054 } 01055 break; 01056 case ARG_TEST: { 01057 //AlignerTester* tester = new AlignerTester(aligner); 01058 //return tester->test(optarg); 01059 throw IllegalArgumentException("Not Implemented."); 01060 } break; 01061 case ':': 01062 throw IllegalArgumentException("An argument must be supplied to this parameter.", current_arg); 01063 break; 01064 case '?': { 01065 int ret = _job->aligner->getParameters()->processArgument(argc, argv); 01066 if (ret == ARGUMENT_ERROR_NOT_FOUND) { 01067 throw IllegalArgumentException("Invalid Option.", current_arg); 01068 } else if (ret == ARGUMENT_ERROR_NO_OPTION) { 01069 throw IllegalArgumentException("An argument must be supplied to this parameter.", current_arg); 01070 } else if (ret > 0) { 01071 throw IllegalArgumentException("Unhandled argument.", current_arg); 01072 } else if (ret) { 01073 throw IllegalArgumentException(_job->aligner->getParameters()->getLastError(), current_arg); 01074 } 01075 } break; 01076 default: 01077 abort (); 01078 break; 01079 } 01080 } 01081 01082 /* Mandatory file names */ 01083 if (_job->peer_listen_port < 0) { 01084 if (argc - optind == 2 ) { 01085 fasta_file[0] = argv[optind++]; 01086 fasta_file[1] = argv[optind++]; 01087 } else { 01088 throw IllegalArgumentException("Supply two fasta files."); 01089 } 01090 } 01091 } catch (IllegalArgumentException& err) { 01092 fprintf(stderr, "%s", err.what()); 01093 fprintf(stderr, "See `%s --help' for more information.\n", argv[0]); 01094 exit(2); 01095 } 01096 01097 if (_job->peer_listen_port >=0 ) { 01098 MasaNet* peer = new MasaNet(TYPE_PROCESSING_NODE, "MASA-extension"); 01099 peer->startServer(_job->peer_listen_port); 01100 if (_job->peer_connect.length() > 0) { 01101 string address = _job->peer_connect; 01102 if (peer->connectToPeer(address, CONNECTION_TYPE_CTRL) == NULL) { 01103 printf("MasaNet Connection Error.\n"); 01104 exit(-1); 01105 } 01106 } 01107 sleep(600); 01108 } 01109 01110 /* Loads both sequences */ 01111 /*for (int i=0; i<2; i++) { 01112 load_sequence (alignment_params->getSeq(i), clear_n, reverse_seq[i], complement_seq[i], fasta_file[i]); 01113 }*/ 01114 01115 01116 Timer timer; 01117 int ev_start = timer.createEvent("START"); 01118 int ev_seqs = timer.createEvent("SEQUENCES"); 01119 int ev_init = timer.createEvent("INIT"); 01120 int ev_stage1 = timer.createEvent("STAGE1"); 01121 int ev_stage2 = timer.createEvent("STAGE2"); 01122 int ev_stage3 = timer.createEvent("STAGE3"); 01123 int ev_stage4 = timer.createEvent("STAGE4"); 01124 int ev_stage5 = timer.createEvent("STAGE5"); 01125 int ev_stage6 = timer.createEvent("STAGE6"); 01126 01127 timer.eventRecord(ev_start); 01128 01129 01130 for (int i=0; i<2; i++) { 01131 SequenceInfo* sequenceInfo = new SequenceInfo(); 01132 sequenceInfo->setFilename(fasta_file[i]); 01133 01134 SequenceModifiers* modifiers = new SequenceModifiers(); 01135 if (i == 0) { 01136 modifiers->setClearN(clear_n); 01137 } else { 01138 modifiers->setClearN(false); 01139 } 01140 modifiers->setReverse(reverse_seq[i]); 01141 modifiers->setComplement(complement_seq[i]); 01142 modifiers->setTrimStart(trim_start[i]); 01143 modifiers->setTrimEnd(trim_end[i]); 01144 01145 Sequence* sequence = new Sequence(sequenceInfo, modifiers); 01146 _job->addSequence(sequence); 01147 01148 alignment_params->addSequence(sequence); 01149 } 01150 01151 //_job->setSequence(seq0, trim_i0, trim_i1, clear_n, reverse_seq[0], complement_seq[0]); 01152 01153 /* Fork processes if using multiple gpus */ 01154 if (split_count > 0) { 01155 split_sequences ( _job, split_step, split_count, split_proportions, wait_part ); 01156 } 01157 01158 timer.eventRecord(ev_seqs); 01159 01160 01161 if ( fork_count != NOT_FORKED_INSTANCE) { 01162 if (phase == ALL_STAGES) { 01163 //fprintf(stderr, "Warning: only Stage 1 will be executed in forked processes.\n"); 01164 //phase = STAGE_1; 01165 } else if (phase != STAGE_1) { 01166 fprintf(stderr, "FATAL: only Stage 1 is supported in forked processes.\n"); 01167 exit(1); 01168 } 01169 /*if (_job->disk_limit != NO_FLUSH || _job->ram_limit != NO_FLUSH) { 01170 fprintf(stderr, "Warning: disabling flushing rows in forked processes.\n"); 01171 _job->disk_limit = NO_FLUSH; 01172 _job->ram_limit = NO_FLUSH; 01173 }*/ 01174 if (_job->block_pruning) { 01175 fprintf(stderr, "Warning: disabling Block Pruning in forked processes.\n"); 01176 _job->block_pruning = false; 01177 //_job->disk_limit = NO_FLUSH; 01178 //_job->ram_limit = NO_FLUSH; 01179 } 01180 01181 fork_multi_process ( fork_count, _job, fork_proportions, split_step ); 01182 } 01183 01184 alignment_params->printParams(stdout); 01185 01186 /* Job initialization */ 01187 if ( !_job->initialize() ) { // TODO throw exception 01188 fprintf(stderr, "Error during Job initialization\n"); 01189 exit ( 1 ); 01190 } 01191 01192 FILE* aligner_stats = _job->fopenStatistics(ALIGNER_STATISTICS, 0); 01193 print_header(aligner_header, aligner_stats); 01194 fprintf(aligner_stats, "%s", argv[0]); 01195 for (int i=1; i<argc; i++) { 01196 fprintf(aligner_stats, " %s", argv[i]); 01197 } 01198 fprintf(aligner_stats, "\n\n"); 01199 aligner->printInitialStatistics(aligner_stats); 01200 fflush(aligner_stats); 01201 01202 timer.eventRecord(ev_init); 01203 01204 01205 /* Job Execution */ 01206 01207 if ( phase == ALL_STAGES ) { 01208 int count = stage1 ( _job ); 01209 timer.eventRecord(ev_stage1); 01210 if (_job->getAlignerPool() == NULL) { 01211 executeTraceback(_job, &timer, count, ev_stage2, ev_stage3, ev_stage4, ev_stage5, ev_stage6); 01212 } else { 01213 fprintf(stderr, "FATAL: Pool aligners is only supported with Stage 1 yet.\n"); 01214 exit(1); 01215 } 01216 01217 } else if ( phase == STAGE_1 ) { 01218 stage1 ( _job ); 01219 timer.eventRecord(ev_stage1); 01220 } else if ( phase == STAGE_2 ) { 01221 stage2 ( _job, alignment_id ); 01222 timer.eventRecord(ev_stage2); 01223 } else if ( phase == STAGE_3 ) { 01224 stage3 ( _job, alignment_id ); 01225 timer.eventRecord(ev_stage3); 01226 } else if ( phase == STAGE_4 ) { 01227 stage4 ( _job, alignment_id ); 01228 timer.eventRecord(ev_stage4); 01229 } else if ( phase == STAGE_5 ) { 01230 stage5 ( _job, alignment_id ); 01231 timer.eventRecord(ev_stage5); 01232 } else if ( phase == STAGE_6 ) { 01233 stage6 ( _job, alignment_id ); 01234 timer.eventRecord(ev_stage6); 01235 } 01236 01237 FILE* stats = _job->fopenStatistics(STAGE_GLOBAL, 0); 01238 double size = ((double)_job->getSequence(0)->getLen())*_job->getSequence(1)->getLen(); 01239 01240 float diff = timer.printStatistics(stats); 01241 fprintf(stats, " Total: %.4f\n", diff); 01242 fprintf(stats, " Matrix: %.4e\n", size); 01243 fprintf(stats, " MCUPS: %.4f\n", size/1000000.0f/(diff/1000.0f)); 01244 01245 fclose(stats); 01246 01247 aligner->finalize(); 01248 aligner->printFinalStatistics(aligner_stats); 01249 fclose(aligner_stats); 01250 01251 /* Job finalization */ 01252 /*if (multiple_fork) { 01253 wait(); 01254 }*/ 01255 01256 // TODO ugly! 01257 delete _job->getSequence(0)->getModifiers(); 01258 delete _job->getSequence(1)->getModifiers(); 01259 delete _job->getSequence(0)->getInfo(); 01260 delete _job->getSequence(1)->getInfo(); 01261 delete _job->getSequence(0); 01262 delete _job->getSequence(1); 01263 delete _job->configs; 01264 delete _job; 01265 01266 exit ( 0 ); 01267 } 01268 01269 01270 01271
1.7.6.1