MASA-Core
Defines | Functions
libmasa.cpp File Reference
#include "libmasa.hpp"
#include <stdio.h>
#include <stdlib.h>
#include <getopt.h>
#include <string.h>
#include <errno.h>
#include <sys/wait.h>
#include <unistd.h>
#include "../common/Common.hpp"
#include "../stage1/sw_stage1.h"
#include "../stage2/sw_stage2.h"
#include "../stage3/sw_stage3.h"
#include "../stage4/sw_stage4.h"
#include "../stage5/sw_stage5.h"
#include "../stage6/sw_stage6.h"
#include "../masanet/MasaNet.hpp"
#include "../config.h"
#include <sstream>

Go to the source code of this file.

Defines

#define DEFAULT_RAM_LIMIT   (0)
 Default amount of disk/ram space used for flushing special lines.
#define DEFAULT_FLUSH_RAM_STRING   "0"
#define DEFAULT_DISK_LIMIT   (0)
#define DEFAULT_FLUSH_DISK_STRING   "0"
#define MAX_POSSIBLE_FORK   (0)
 Fork the maximum number of processes supported by the Aligner.
#define NO_FLUSH   (-1)
 Constant used to represent that no special lines will be flushed.
#define ALL_STAGES   (0)
 Constant used to execute all stages.
#define DEFAULT_WORK_DIRECTORY   "./work.tmp"
 The default working directory for temprary data.
#define DEFAULT_PHASE_3_SIZE   16
#define DEFAULT_MPS_STRING   "16"
#define DEFAULT_MAX_ALIGNMENTS   1
#define DEFAULT_MAX_ALIGNMENTS_STRING   "1"
#define DEFAULT_BUFFER_LIMIT   (1024*1024)
#define SEQUENCES_COUNT   (2)
 Only pairwise sequence alignment is supported.
#define ARG_HELP   'h'
 Optarg: command line parameters.
#define ARG_WORK_DIR   'd'
#define ARG_CLEAR   'c'
#define ARG_VERBOSE   'v'
#define ARG_SPECIAL_ROWS_DIR   0x8003
#define ARG_SHARED_DIR   0x8004
#define ARG_WAIT_PART   0x8005
#define ARG_FORK   0x8006
#define ARG_TRIM   't'
#define ARG_SPLIT   's'
#define ARG_PART   0x9005
#define ARG_CLEAR_N   0x9006
#define ARG_REVERSE   0x9007
#define ARG_COMPLEMENT   0x9008
#define ARG_REVERSE_COMPLEMENT   0x9009
#define ARG_ALIGNMENT_START   0x9101
#define ARG_ALIGNMENT_END   0x9102
#define ARG_ALIGNMENT_EDGES   0x9103
#define ARG_STAGE_1   '1'
#define ARG_NO_FLUSH   'n'
#define ARG_NO_BLOCK_PRUNING   'p'
#define ARG_DUMP_BLOCKS   0x1007
#define ARG_DISK_SIZE   0x1008
#define ARG_RAM_SIZE   0x1009
#define ARG_FLUSH_COLUMN   0x1010
#define ARG_LOAD_COLUMN   0x1011
#define ARG_ALIGNMENT_ID   0x1012
#define ARG_MAX_ALIGNMENTS   0x1013
#define ARG_MASANET   0x1014
#define ARG_MASANET_CONNECT   0x1015
#define ARG_STAGE_2   '2'
#define ARG_PREDICTED_TRACEBACK   0x2011
#define ARG_STAGE_3   '3'
#define ARG_MAXIMUM_PARTITION   0x3011
#define ARG_NOT_ORTHOGONAL   0x3012
#define ARG_STAGE_4   '4'
#define ARG_STAGE_5   '5'
#define ARG_STAGE_6   '6'
#define ARG_OUTPUT_FORMAT   0x6013
#define ARG_LIST_FORMATS   0x6014
#define ARG_DRAW_PRUNING   0x7015
#define ARG_TEST   0x7016
#define TOOL_DRAW_PRUNING   (1)
#define MASA_HEADER   "\Linked with MASA - Malleable Architecture for Sequence Aligners - "PACKAGE_VERSION"\n\University of Brasilia/UnB - Brazil \n\Copyright (c) 2010-2015 Edans Sandes - License GPLv3\n\This program comes with ABSOLUTELY NO WARRANTY.\n\\n"
 Header.
#define USAGE   "\Usage: %s [OPTIONS] [FASTA FILE #1] [FASTA FILE #2] \n\\n\FASTA FILES: Supply two sequences in fasta format files. \n\\n\\n\\033[1mGeneral Options:\033[0m\n\\n\-h, --help Shows this help.\n\-d, --work-dir=DIR Directory used to store files produced by the stages.\n\ Default: "DEFAULT_WORK_DIRECTORY" \n\--special-rows-dir=DIR Directory used to store the special rows produced by\n\ the gpu stages. The default is to use a subfolder of\n\ the work directory (see --work-dir parameter).\n\--shared-dir=DIR Directory used to share data between forked instances.\n\--wait-part=PART Process will wait until the conclusion of --part=PART.\n\-c, --clear Clears the work directory before any computation. This \n\ prevents the continuation of previously interrupted \n\ execution.\n\-v, --verbose=LEVEL Shows informative output during computation. \n\ 0: Silently;\n\ 1: Only shows error messages;\n\ 2: (Default) Shows progress and statistics; \n\ 3: Gives full output data.\n\--fork Fork many processes in order to optimize performance. \n\--fork=COUNT Fork with a limited number of processes.\n\--fork=W1,W2,...,Wn Fork with the given weight proportions.\n\\n\\n\\033[1mInput Options:\033[0m\n\\n\-t, --trim=I0,I1,J0,J1 Trims sequence #1 from position I0 to I1 (inclusive). \n\ and sequence #2 from position J0 to J1 (inclusive). \n\ Zero represents either first and last positions. \n\ This parameter is ignored if used together with the \n\ --split parameter. \n\--split=COUNT Splits sequence #2 in COUNT equal segments. This \n\ parameter must be used together with the \n\ --part parameter. \n\--split=W1,W2,...,Wn Splits sequence #2 in n segments with weighted \n\ proportions. This parameter must be used \n\ together with the --part parameter. \n\--part=PART When the --split parameter is used, the sequence #2 is \n\ divided in many parts. The --part parameter selects \n\ which part will be executed by this process. \n\ If the --load-columns and --flush-columns parameters\n\ are not set, then the last column will be saved into\n\ a file in the current directory. \n\--clear-n Remove all 'N' characters on both fasta files.\n\--reverse=[1|2|both] Reverse strands of sequence 1, 2 or both. \n\--complement=[1|2|both] Generate complement (AT,CG) for sequence 1, 2 or both. \n\--reverse-complement=[1|2|both] \n\ Generate reverse-complement (opposite strand) for \n\ sequence 1, 2 or both. This parameter joins the \n\ --reverse and --complement parameters. \n\\n\\033[1mAlignment Type:\033[0m\n\\n\--alignment-start=[*|1|2|3|+] \n\--alignment-end= [*|1|2|3|+] \n\--alignment-edges=[*|1|2|3|+][*|1|2|3|+] (start,end)\n\ Defines where the alignment can start or end. \n\ - *: any location.\n\ - 1: start/end of sequence 1. \n\ - 2: start/end of sequence 2. \n\ - 3: start/end of sequences 1 or 2. \n\ - +: start/end of sequences 1 and 2. \n\\n\\033[1mStage Options:\033[0m\n\\n\\033[1mStage #1 Options:\033[0m\n\-1, --stage-1 Executes only the stage #1 of algorithm, i.e., returns \n\ the best score and its coordinates. Special rows \n\ are stored in disk to allow the execution of the \n\ subsequent stages.\n\-n, --no-flush Do not save special rows. Using this option \n\ in stage #1 will prevent the execution of subsequent\n\ phases.\n\-p, --no-block-pruning Does not use the block pruning optimization \n\\n\--disk-size=SIZE Limits the disk/ram size available to the special rows.\n\--ram-size=SIZE The SIZE parameter may contain suffix M (e.g., 500M)\n\ or G (e.g., 10G). This option is ignored if used\n\ together with the --no-flush parameter. \n\ Default values: "DEFAULT_FLUSH_RAM_STRING"/"DEFAULT_FLUSH_DISK_STRING".\n\--flush-column=URL Store the last column cells in some destination. The \n\ URL is given in some of these formats: \n\ file://PATH_TO_FILE \n\ socket://0.0.0.0:LISTENING_PORT \n\--load-column=URL Loads the first column cells from some destination. The\n\ URL is given in some of these formats: \n\ file://PATH_TO_FILE \n\ socket://HOSTNAME:PORT \n\--dump-blocks Saves the result of each block in the alignment file. \n\--max-alignments Maximum number of alignments to return. Default:"DEFAULT_MAX_ALIGNMENTS_STRING".\n\\n\\033[1mStage #2 Options:\033[0m\n\-2, --stage-2 Executes only the stage #2 of algorithm, i.e., returns \n\ a list of crosspoints inside the optimal alignment. \n\ Special columns are stored in disk to allow the \n\ execution of the subsequent stages. The disk size \n\ available to store the special columns may be \n\ configured using the --disk-space parameter. \n\\n\\033[1mStage #3 Options:\033[0m\n\-3, --stage-3 Executes only the stage #3 of algorithm, i.e., returns \n\ a bigger list of crosspoints inside the optimal \n\ alignment.\n\\n\\033[1mStage #4 Options:\033[0m\n\-4, --stage-4 Executes only the stage #4 of algorithm, i.e., given a \n\ list of coordinates of the optimal alignment, \n\ increases the number of crosspoint using \n\ Myers and Miller's algorithm, until all the \n\ partitions are smaller than the maximum partition \n\ size.\n\--maximum-partition=SIZE \n\ Defines the maximum partition size allowed as output \n\ of the stage #4. This parameter limits the size of \n\ partitions processed in stage #5. \n\ Default Value: "DEFAULT_MPS_STRING" \n\--not-orthogonal Does not use the orthogonal execution otimization. \n\\n\\033[1mStage #5 Options:\033[0m\n\-5, --stage-5 Executes only the stage #5 of algorithm, i.e., given \n\ a list of coordinates of the optimal alignment, \n\ returns the full alignment (as binary output). \n\\n\\033[1mStage #6 Options:\033[0m\n\-6, --stage-6 Executes only the stage #6 of algorithm, i.e., given \n\ an alignment in binary format, returns the full \n\ alignment in the format defined in with the \n\ --output-format argument.\n\--output-format=FORMAT Selects the output format of the full alignment \n\ in stage #6. Possibile formats may be listed with \n\ the --list-formats parameter. \n\--list-formats Lists all the possible output formats for stage #6. \n\\n\\n\"
 Usage string to be shown in help.

Functions

void executeTraceback (Job *_job, Timer *timer, int count, int ev_stage2, int ev_stage3, int ev_stage4, int ev_stage5, int ev_stage6)
int libmasa_entry_point (int argc, char **argv, IAligner *aligner, char *aligner_header)
 Entry point for the MASA architecture.

Define Documentation

#define ALL_STAGES   (0)

Constant used to execute all stages.

Definition at line 70 of file libmasa.cpp.

#define ARG_ALIGNMENT_EDGES   0x9103

Definition at line 132 of file libmasa.cpp.

#define ARG_ALIGNMENT_END   0x9102

Definition at line 131 of file libmasa.cpp.

#define ARG_ALIGNMENT_ID   0x1012

Definition at line 144 of file libmasa.cpp.

#define ARG_ALIGNMENT_START   0x9101

Definition at line 130 of file libmasa.cpp.

#define ARG_CLEAR   'c'

Definition at line 109 of file libmasa.cpp.

#define ARG_CLEAR_N   0x9006

Definition at line 124 of file libmasa.cpp.

#define ARG_COMPLEMENT   0x9008

Definition at line 126 of file libmasa.cpp.

#define ARG_DISK_SIZE   0x1008

Definition at line 140 of file libmasa.cpp.

#define ARG_DRAW_PRUNING   0x7015

Definition at line 163 of file libmasa.cpp.

#define ARG_DUMP_BLOCKS   0x1007

Definition at line 139 of file libmasa.cpp.

#define ARG_FLUSH_COLUMN   0x1010

Definition at line 142 of file libmasa.cpp.

#define ARG_FORK   0x8006

Definition at line 118 of file libmasa.cpp.

#define ARG_HELP   'h'

Optarg: command line parameters.

Characters are short options. Hexadecimals are long options.

Definition at line 107 of file libmasa.cpp.

#define ARG_LIST_FORMATS   0x6014

Definition at line 160 of file libmasa.cpp.

#define ARG_LOAD_COLUMN   0x1011

Definition at line 143 of file libmasa.cpp.

#define ARG_MASANET   0x1014

Definition at line 147 of file libmasa.cpp.

#define ARG_MASANET_CONNECT   0x1015

Definition at line 148 of file libmasa.cpp.

#define ARG_MAX_ALIGNMENTS   0x1013

Definition at line 145 of file libmasa.cpp.

#define ARG_MAXIMUM_PARTITION   0x3011

Definition at line 154 of file libmasa.cpp.

#define ARG_NO_BLOCK_PRUNING   'p'

Definition at line 138 of file libmasa.cpp.

#define ARG_NO_FLUSH   'n'

Definition at line 137 of file libmasa.cpp.

#define ARG_NOT_ORTHOGONAL   0x3012

Definition at line 155 of file libmasa.cpp.

#define ARG_OUTPUT_FORMAT   0x6013

Definition at line 159 of file libmasa.cpp.

#define ARG_PART   0x9005

Definition at line 123 of file libmasa.cpp.

#define ARG_PREDICTED_TRACEBACK   0x2011

Definition at line 151 of file libmasa.cpp.

#define ARG_RAM_SIZE   0x1009

Definition at line 141 of file libmasa.cpp.

#define ARG_REVERSE   0x9007

Definition at line 125 of file libmasa.cpp.

#define ARG_REVERSE_COMPLEMENT   0x9009

Definition at line 127 of file libmasa.cpp.

#define ARG_SHARED_DIR   0x8004

Definition at line 116 of file libmasa.cpp.

#define ARG_SPECIAL_ROWS_DIR   0x8003

Definition at line 115 of file libmasa.cpp.

#define ARG_SPLIT   's'

Definition at line 122 of file libmasa.cpp.

#define ARG_STAGE_1   '1'

Definition at line 136 of file libmasa.cpp.

#define ARG_STAGE_2   '2'

Definition at line 150 of file libmasa.cpp.

#define ARG_STAGE_3   '3'

Definition at line 153 of file libmasa.cpp.

#define ARG_STAGE_4   '4'

Definition at line 156 of file libmasa.cpp.

#define ARG_STAGE_5   '5'

Definition at line 157 of file libmasa.cpp.

#define ARG_STAGE_6   '6'

Definition at line 158 of file libmasa.cpp.

#define ARG_TEST   0x7016

Definition at line 164 of file libmasa.cpp.

#define ARG_TRIM   't'

Definition at line 121 of file libmasa.cpp.

#define ARG_VERBOSE   'v'

Definition at line 110 of file libmasa.cpp.

#define ARG_WAIT_PART   0x8005

Definition at line 117 of file libmasa.cpp.

#define ARG_WORK_DIR   'd'

Definition at line 108 of file libmasa.cpp.

#define DEFAULT_BUFFER_LIMIT   (1024*1024)

Definition at line 93 of file libmasa.cpp.

#define DEFAULT_DISK_LIMIT   (0)

Definition at line 54 of file libmasa.cpp.

#define DEFAULT_FLUSH_DISK_STRING   "0"

Definition at line 55 of file libmasa.cpp.

#define DEFAULT_FLUSH_RAM_STRING   "0"

Definition at line 53 of file libmasa.cpp.

#define DEFAULT_MAX_ALIGNMENTS   1

Definition at line 86 of file libmasa.cpp.

Definition at line 87 of file libmasa.cpp.

#define DEFAULT_MPS_STRING   "16"

Definition at line 81 of file libmasa.cpp.

#define DEFAULT_PHASE_3_SIZE   16

Definition at line 80 of file libmasa.cpp.

#define DEFAULT_RAM_LIMIT   (0)

Default amount of disk/ram space used for flushing special lines.

Definition at line 52 of file libmasa.cpp.

#define DEFAULT_WORK_DIRECTORY   "./work.tmp"

The default working directory for temprary data.

Definition at line 75 of file libmasa.cpp.

#define MASA_HEADER   "\Linked with MASA - Malleable Architecture for Sequence Aligners - "PACKAGE_VERSION"\n\University of Brasilia/UnB - Brazil \n\Copyright (c) 2010-2015 Edans Sandes - License GPLv3\n\This program comes with ABSOLUTELY NO WARRANTY.\n\\n"

Header.

Definition at line 173 of file libmasa.cpp.

#define MAX_POSSIBLE_FORK   (0)

Fork the maximum number of processes supported by the Aligner.

Definition at line 60 of file libmasa.cpp.

#define NO_FLUSH   (-1)

Constant used to represent that no special lines will be flushed.

Definition at line 65 of file libmasa.cpp.

#define SEQUENCES_COUNT   (2)

Only pairwise sequence alignment is supported.

Definition at line 100 of file libmasa.cpp.

#define TOOL_DRAW_PRUNING   (1)

Definition at line 167 of file libmasa.cpp.

#define USAGE   "\Usage: %s [OPTIONS] [FASTA FILE #1] [FASTA FILE #2] \n\\n\FASTA FILES: Supply two sequences in fasta format files. \n\\n\\n\\033[1mGeneral Options:\033[0m\n\\n\-h, --help Shows this help.\n\-d, --work-dir=DIR Directory used to store files produced by the stages.\n\ Default: "DEFAULT_WORK_DIRECTORY" \n\--special-rows-dir=DIR Directory used to store the special rows produced by\n\ the gpu stages. The default is to use a subfolder of\n\ the work directory (see --work-dir parameter).\n\--shared-dir=DIR Directory used to share data between forked instances.\n\--wait-part=PART Process will wait until the conclusion of --part=PART.\n\-c, --clear Clears the work directory before any computation. This \n\ prevents the continuation of previously interrupted \n\ execution.\n\-v, --verbose=LEVEL Shows informative output during computation. \n\ 0: Silently;\n\ 1: Only shows error messages;\n\ 2: (Default) Shows progress and statistics; \n\ 3: Gives full output data.\n\--fork Fork many processes in order to optimize performance. \n\--fork=COUNT Fork with a limited number of processes.\n\--fork=W1,W2,...,Wn Fork with the given weight proportions.\n\\n\\n\\033[1mInput Options:\033[0m\n\\n\-t, --trim=I0,I1,J0,J1 Trims sequence #1 from position I0 to I1 (inclusive). \n\ and sequence #2 from position J0 to J1 (inclusive). \n\ Zero represents either first and last positions. \n\ This parameter is ignored if used together with the \n\ --split parameter. \n\--split=COUNT Splits sequence #2 in COUNT equal segments. This \n\ parameter must be used together with the \n\ --part parameter. \n\--split=W1,W2,...,Wn Splits sequence #2 in n segments with weighted \n\ proportions. This parameter must be used \n\ together with the --part parameter. \n\--part=PART When the --split parameter is used, the sequence #2 is \n\ divided in many parts. The --part parameter selects \n\ which part will be executed by this process. \n\ If the --load-columns and --flush-columns parameters\n\ are not set, then the last column will be saved into\n\ a file in the current directory. \n\--clear-n Remove all 'N' characters on both fasta files.\n\--reverse=[1|2|both] Reverse strands of sequence 1, 2 or both. \n\--complement=[1|2|both] Generate complement (AT,CG) for sequence 1, 2 or both. \n\--reverse-complement=[1|2|both] \n\ Generate reverse-complement (opposite strand) for \n\ sequence 1, 2 or both. This parameter joins the \n\ --reverse and --complement parameters. \n\\n\\033[1mAlignment Type:\033[0m\n\\n\--alignment-start=[*|1|2|3|+] \n\--alignment-end= [*|1|2|3|+] \n\--alignment-edges=[*|1|2|3|+][*|1|2|3|+] (start,end)\n\ Defines where the alignment can start or end. \n\ - *: any location.\n\ - 1: start/end of sequence 1. \n\ - 2: start/end of sequence 2. \n\ - 3: start/end of sequences 1 or 2. \n\ - +: start/end of sequences 1 and 2. \n\\n\\033[1mStage Options:\033[0m\n\\n\\033[1mStage #1 Options:\033[0m\n\-1, --stage-1 Executes only the stage #1 of algorithm, i.e., returns \n\ the best score and its coordinates. Special rows \n\ are stored in disk to allow the execution of the \n\ subsequent stages.\n\-n, --no-flush Do not save special rows. Using this option \n\ in stage #1 will prevent the execution of subsequent\n\ phases.\n\-p, --no-block-pruning Does not use the block pruning optimization \n\\n\--disk-size=SIZE Limits the disk/ram size available to the special rows.\n\--ram-size=SIZE The SIZE parameter may contain suffix M (e.g., 500M)\n\ or G (e.g., 10G). This option is ignored if used\n\ together with the --no-flush parameter. \n\ Default values: "DEFAULT_FLUSH_RAM_STRING"/"DEFAULT_FLUSH_DISK_STRING".\n\--flush-column=URL Store the last column cells in some destination. The \n\ URL is given in some of these formats: \n\ file://PATH_TO_FILE \n\ socket://0.0.0.0:LISTENING_PORT \n\--load-column=URL Loads the first column cells from some destination. The\n\ URL is given in some of these formats: \n\ file://PATH_TO_FILE \n\ socket://HOSTNAME:PORT \n\--dump-blocks Saves the result of each block in the alignment file. \n\--max-alignments Maximum number of alignments to return. Default:"DEFAULT_MAX_ALIGNMENTS_STRING".\n\\n\\033[1mStage #2 Options:\033[0m\n\-2, --stage-2 Executes only the stage #2 of algorithm, i.e., returns \n\ a list of crosspoints inside the optimal alignment. \n\ Special columns are stored in disk to allow the \n\ execution of the subsequent stages. The disk size \n\ available to store the special columns may be \n\ configured using the --disk-space parameter. \n\\n\\033[1mStage #3 Options:\033[0m\n\-3, --stage-3 Executes only the stage #3 of algorithm, i.e., returns \n\ a bigger list of crosspoints inside the optimal \n\ alignment.\n\\n\\033[1mStage #4 Options:\033[0m\n\-4, --stage-4 Executes only the stage #4 of algorithm, i.e., given a \n\ list of coordinates of the optimal alignment, \n\ increases the number of crosspoint using \n\ Myers and Miller's algorithm, until all the \n\ partitions are smaller than the maximum partition \n\ size.\n\--maximum-partition=SIZE \n\ Defines the maximum partition size allowed as output \n\ of the stage #4. This parameter limits the size of \n\ partitions processed in stage #5. \n\ Default Value: "DEFAULT_MPS_STRING" \n\--not-orthogonal Does not use the orthogonal execution otimization. \n\\n\\033[1mStage #5 Options:\033[0m\n\-5, --stage-5 Executes only the stage #5 of algorithm, i.e., given \n\ a list of coordinates of the optimal alignment, \n\ returns the full alignment (as binary output). \n\\n\\033[1mStage #6 Options:\033[0m\n\-6, --stage-6 Executes only the stage #6 of algorithm, i.e., given \n\ an alignment in binary format, returns the full \n\ alignment in the format defined in with the \n\ --output-format argument.\n\--output-format=FORMAT Selects the output format of the full alignment \n\ in stage #6. Possibile formats may be listed with \n\ the --list-formats parameter. \n\--list-formats Lists all the possible output formats for stage #6. \n\\n\\n\"

Usage string to be shown in help.

Definition at line 185 of file libmasa.cpp.


Function Documentation

void executeTraceback ( Job _job,
Timer timer,
int  count,
int  ev_stage2,
int  ev_stage3,
int  ev_stage4,
int  ev_stage5,
int  ev_stage6 
)

Definition at line 623 of file libmasa.cpp.

int libmasa_entry_point ( int  argc,
char **  argv,
IAligner aligner,
char *  aligner_header = NULL 
)

Entry point for the MASA architecture.

This function must be called in the main procedure of the extension. The main argc/argv parameter must be passed to the libmasa_entry_point in order to process the command line parameters.

See also:
The aligner/example/main.cpp source file contains a simple example of calling the int libmasa_entry_point function.
Parameters:
argcnumber of arguments
argvcommand line arguments
aligneran instance of the IAligner that will execute the alignment procedure.
aligner_headeroptional text to be print in the usage information.
Returns:
exit code.

Definition at line 641 of file libmasa.cpp.