/********* Sequence input routines for CLUSTAL W *******************/ /* DES was here. FEB. 1994 */ /* Now reads PILEUP/MSF and CLUSTAL alignment files */ #include #include #include #include #include "clustalw.h" #define MIN(a,b) ((a)<(b)?(a):(b)) /* * Prototypes */ static sint get_seq_index(SEQ *seqs,sint nseqs,sint ii); /* * Global variables */ static sint debug; Boolean open_alignment_output(char *filename,ALNOUT_OPTPTR alnout_opt) { if(!alnout_opt->output_clustal && !alnout_opt->output_nbrf && !alnout_opt->output_gcg && !alnout_opt->output_phylip && !alnout_opt->output_gde) { error("You must select an alignment output format"); return FALSE; } if(alnout_opt->output_clustal) if((alnout_opt->clustal_outfile=open_output_file(filename, "CLUSTAL", "aln", alnout_opt->clustal_outname))==NULL) return FALSE; if(alnout_opt->output_nbrf) if((alnout_opt->nbrf_outfile=open_output_file(filename, "NBRF", "pir", alnout_opt->nbrf_outname))==NULL) return FALSE; if(alnout_opt->output_gcg) if((alnout_opt->gcg_outfile=open_output_file(filename, "GCG", "msf", alnout_opt->gcg_outname))==NULL) return FALSE; if(alnout_opt->output_phylip) if((alnout_opt->phylip_outfile=open_output_file(filename, "PHYLIP", "phy", alnout_opt->phylip_outname))==NULL) return FALSE; if(alnout_opt->output_gde) if((alnout_opt->gde_outfile=open_output_file(filename, "GDE", "gde", alnout_opt->gde_outname))==NULL) return FALSE; return TRUE; } /* gets a filename from the user and opens the file returns the file handle (the filename is written into char *file_name) */ FILE * open_output_file(char *in_name, char *prompt, char *file_extension, char *out_name) { char temp[FILENAMELEN+1]; char path[FILENAMELEN+1]; char local_prompt[MAXLINE]; FILE * file_handle; Boolean usemenu; /* if the output filename is already specified, just open the file and return */ if (out_name[0]!=EOS) { file_handle = open_explicit_file(out_name); return file_handle; } usemenu=get_usemenu(); /* otherwise, suggest a name to the user and prompt if this is ok */ get_path(in_name,path); strcpy(out_name,path); strcat(out_name,file_extension); if(strcmp(out_name,in_name)==0) { warning("Output file name is the same as input file."); if (usemenu) { strcpy(local_prompt,"\n\nEnter new name to avoid overwriting "); strcat(local_prompt," [%s]: "); fprintf(stdout,local_prompt,out_name); gets(temp); if(*temp != EOS) strcpy(out_name,temp); } } else if (usemenu) { strcpy(local_prompt,"\nEnter a name for the "); strcat(local_prompt,prompt); strcat(local_prompt," output file "); strcat(local_prompt," [%s]: "); fprintf(stdout,local_prompt,out_name); gets(temp); if(*temp != EOS) strcpy(out_name,temp); } #ifdef VMS if((file_handle=fopen(out_name,"w","rat=cr","rfm=var"))==NULL) { #else if((file_handle=fopen(out_name,"w"))==NULL) { #endif error("Cannot open output file [%s]",out_name); return NULL; } return file_handle; } FILE * open_explicit_file(char *file_name) { FILE * file_handle; if (*file_name == EOS) { error("Bad output file [%s]",file_name); return NULL; } #ifdef VMS if((file_handle=fopen(file_name,"w","rat=cr","rfm=var"))==NULL) { #else if((file_handle=fopen(file_name,"w"))==NULL) { #endif error("Cannot open output file [%s]",file_name); return NULL; } return file_handle; } void create_alignment_output(ALN mult_aln,sint fseq, sint nseq,ALNOUT_OPT alnout_opt,SS_OPT ss_opt) { sint i,length; Boolean usemenu; length=0; for (i=fseq;i0.5 and weak score =<0.5. Strong matching columns to be assigned ':' and weak matches assigned '.' in the clustal output format. */ char *res_cat1[] = { "sta", "neqk", "nhqk", "ndeq", "qhrk", "milv", "milf", "hy", "fyw", NULL }; char *res_cat2[] = { "csa", "atv", "sag", "stnk", "stpa", "sgnd", "sndeqk", "ndeqhk", "neqhrk", "fvlim", "hfy", NULL }; seq_no = (sint *)ckalloc((nseq+1) * sizeof(sint)); print_seq_no = (sint *)ckalloc((nseq+1) * sizeof(sint)); max_aln_length=max_names=0; for (i=fseq;imax_aln_length) max_aln_length=mult_aln.seqs[i].len; if(strlen(mult_aln.seqs[i].name)>max_names) max_names=strlen(mult_aln.seqs[i].name); print_seq_no[i] = seq_no[i] = 0; for(j=0;j LINELENGTH) line_length=LINELENGTH; chunks = len/line_length; if(len % line_length != 0) ++chunks; for(lv1=0;lv1max_aln_length) max_aln_length=mult_aln.seqs[i].len; if(strlen(mult_aln.seqs[i].name)>max_names) max_names=strlen(mult_aln.seqs[i].name); } seq = (char *)ckalloc((max_aln_length+1) * sizeof(char)); all_checks = (sint *)ckalloc((nseq+1) * sizeof(sint)); for(i=fseq; i10) { warn=FALSE; for(i=fseq; imax_aln_length) max_aln_length=mult_aln.seqs[i].len; if(strlen(mult_aln.seqs[i].name)>max_names) max_names=strlen(mult_aln.seqs[i].name); } seq = (char *)ckalloc((max_aln_length+1) * sizeof(char)); /* decide the line length for this alignment - maximum is LINELENGTH */ line_length=PAGEWIDTH-max_names; line_length=line_length-line_length % 10; /* round to a multiple of 10*/ if (line_length > LINELENGTH) line_length=LINELENGTH; for(ii=fseq; iiDL;" : ">P1;"); fprintf(nbout, "%s\n%s\n", mult_aln.seqs[i].name, mult_aln.seqs[i].title); slen = 0; for(j=fres; jmax_aln_length) max_aln_length=mult_aln.seqs[i].len; if(strlen(mult_aln.seqs[i].name)>max_names) max_names=strlen(mult_aln.seqs[i].name); } seq = (char *)ckalloc((max_aln_length+1) * sizeof(char)); /* decide the line length for this alignment - maximum is LINELENGTH */ line_length=PAGEWIDTH-max_names; line_length=line_length-line_length % 10; /* round to a multiple of 10*/ if (line_length > LINELENGTH) line_length=LINELENGTH; if (mult_aln.prf1.ss.struct_penalties == SECST && ss_opt.use_ss1 == TRUE) { ss_mask1=calc_sec_struct_mask(mult_aln.seqs[0].len,mult_aln.prf1.ss.sec_struct_mask,ss_opt); } if (mult_aln.prf2.ss.struct_penalties == SECST && ss_opt.use_ss2 == TRUE) { ss_mask2=calc_sec_struct_mask(mult_aln.seqs[mult_aln.prf1.nseqs].len,mult_aln.prf2.ss.sec_struct_mask,ss_opt); } for(ii=fseq; ii=prf_length || (tolower(mask[i+j]) != 'a' && mask[i+j] != '$')) break; tmp_mask[i+j] = 'a'; } i += j; while (tolower(mask[i]) == 'a' || mask[i] == '$') { if (i>=prf_length) break; if (mask[i] == '$') { tmp_mask[i] = 'A'; i++; break; } else tmp_mask[i] = mask[i]; i++; } for (j = 0; j=0) && (tolower(mask[i-j-1]) == 'a' || mask[i-j-1] == '$')) tmp_mask[i-j-1] = 'a'; } } else if (tolower(mask[i]) == 'b' || mask[i] == '%') { for (j = 0; j=prf_length || (tolower(mask[i+j]) != 'b' && mask[i+j] != '%')) break; tmp_mask[i+j] = 'b'; } i += j; while (tolower(mask[i]) == 'b' || mask[i] == '%') { if (i>=prf_length) break; if (mask[i] == '%') { tmp_mask[i] = 'B'; i++; break; } else tmp_mask[i] = mask[i]; i++; } for (j = 0; j=0) && (tolower(mask[i-j-1]) == 'b' || mask[i-j-1] == '%')) tmp_mask[i-j-1] = 'b'; } } else i++; } return tmp_mask; } /* calculate a gap penalty mask from the secondary structures in the input file */ void calc_gap_penalty_mask(int prf_length, char *ss_mask, char *gap_mask, SS_OPT ss_opt) { int i; char *struct_mask; /* first calculate a temp secondary structure mask */ struct_mask = calc_sec_struct_mask(prf_length,ss_mask,ss_opt); /* then the gap penalty mask */ for(i=0;i