#include #include #include #include #include "clustalw.h" /* * Prototypes */ static sint read_dialign_list(FILE *fd,MOTIF *motifs); static sint read_ballast_list(FILE *fd,MOTIF *motifs,sint nseqs,SEQ *seqs,sint max,Boolean propogate); static sint start_pos(char *seq,sint p); static sint next_pos(char *seq,sint len,sint p); static sint debug=1; sint read_motif_list(char *filename,MOTIF *motifs,sint nseqs,SEQ *seqs,Boolean propogate) { sint nmotifs=0; float max; FILE *fd; char inline1[1024]; if ((fd=fopen(filename,"r"))==NULL) { error("cannot open motif file %s", filename); return((sint)0); } if (fgets(inline1,1024,fd) != NULL) { if(keyword(inline1,"Ballast")) { sscanf(&inline1[7],"%f",&max); nmotifs=read_ballast_list(fd,motifs,nseqs,seqs,(sint)max,propogate); } else { rewind(fd); nmotifs=read_dialign_list(fd,motifs); } } fclose(fd); return nmotifs; } static sint read_ballast_list(FILE *fd,MOTIF *motifs,sint nseqs,SEQ *seqs,sint max,Boolean propogate) { sint i,j,n,nmotifs=0; sint seq1,seq2,pos1,pos2,len; char name1[MAXNAMES],name2[MAXNAMES]; char s1[1024],s2[1024],s3[1024]; float weight; char inline1[1024]; char *tmp; while (fgets(inline1,1024,fd) != NULL) { if((tmp=strstr(inline1,"seq:"))!=NULL) { tmp+=4; if((n=sscanf(tmp,"%s %s %s %d %d %s %d %s %f\n", name1,name2,s1,&pos1,&pos2,s2,&len,s3,&weight))!=9) { continue; } seq1=-1; for(i=0;i3) { weight*=(float)max/(float)nseqs; if(weight>100) { printf("%s %s %s %d %d %s %d %s %f\n",name1,name2,s1,pos1,pos2,s2,len,s3,weight); motifs[nmotifs].weight=(weight/30.0)*INT_SCALE_FACTOR; if(nmotifs>=MAXMOTIFS-1) { warning("Superceded max motifs (= %d)\n",MAXMOTIFS); return nmotifs; } nmotifs++; } } } } fprintf(stdout,"nmotifs %d\n",nmotifs); /* extrapolate the motifs through the other sequences */ if(propogate) { n=0; for(i=0;i=MAXMOTIFS-1) { warning("Superceded max motifs (= %d)\n",MAXMOTIFS); return nmotifs+n; } } } } } return nmotifs+n; } static sint read_dialign_list(FILE *fd,MOTIF *motifs) { sint n,nmotifs=0; sint seq1,seq2,pos1,pos2,len; char s1[1024],s2[1024],s3[1024]; float weight; char inline1[1024]; char *tmp; while (fgets(inline1,1024,fd) != NULL) { if((tmp=strstr(inline1,"seq:"))!=NULL) { tmp+=4; if((n=sscanf(tmp,"%d %d %s %d %d %s %d %s %f\n", &seq1,&seq2,s1,&pos1,&pos2,s2,&len,s3,&weight))!=9) { error("Wrong format in motif list file\n"); fclose(fd); return((sint)0); } motifs[nmotifs].seq1=seq1; motifs[nmotifs].seq2=seq2; motifs[nmotifs].pos1=pos1; motifs[nmotifs].pos2=pos2; motifs[nmotifs].len=len; /*if(weight>10 && len>5) {*/ if(weight>0 && len>5) { motifs[nmotifs].weight=10.0*weight*INT_SCALE_FACTOR; if(nmotifs>=MAXMOTIFS-1) { warning("Superceded max motifs (= %d)\n",MAXMOTIFS); return nmotifs; } nmotifs++; } } } return nmotifs; } void calc_motif_scores(SEQ *seqs, sint nseqs, sint *group, MOTIF *motifs, sint nmotifs, sint **motif_score, sint prf_length1, sint prf_length2) { char c; sint i,j, m,n1,n2; sint seq1, seq2, pos1, pos2, len, weight; sint ix1,ix2; for (i=0; i=1) fprintf(stdout,"nmotifs %d\n",nmotifs); /* check the list of motifs */ for(m=0;m=1) fprintf(stdout,"1. %d %d %d %d %d %d weight %d\n",seq1,seq2,pos1,pos2,ix1,ix2,weight); for(i=0;i=prf_length1) break; if(ix2>=prf_length2) break; motif_score[ix1][ix2]+=weight; ix1=next_pos(seqs[seq1].data,prf_length1,ix1); ix2=next_pos(seqs[seq2].data,prf_length2,ix2); } } else if(group[seq2]==1 && group[seq1]==2) { ix1=start_pos(seqs[seq1].data,pos1); ix2=start_pos(seqs[seq2].data,pos2); weight=motifs[m].weight/n2; if(debug>=1) fprintf(stdout,"2. %d %d %d %d %d %d weight %d\n",seq1,seq2,pos1,pos2,ix1,ix2,weight); for(i=0;i=prf_length2) break; if(ix2>=prf_length1) break; motif_score[ix2][ix1]+=weight; ix1=next_pos(seqs[seq1].data,prf_length2,ix1); ix2=next_pos(seqs[seq2].data,prf_length1,ix2); } } } if(debug>=2) { fprintf(stdout,"\n"); for(i=0;i=p) break; } return i; } static sint next_pos(char *seq,sint len,sint p) { p++; if(p>=len) return (len-1); while(seq[p]=='.' && p=1) fprintf(stdout,"nmotifs %d\n",nmotifs); /* check the list of motifs */ for(m=0;m=1) fprintf(stdout,"1. %d %d %d %d %d %d weight %d\n",s1,s2,pos1,pos2,ix1,ix2,motifs[m].weight); for(i=0;i=len1) break; if(ix2>=len2) break; /* if (motifs[m].weight>motif_score[ix1][ix2]) */ motif_score[ix1][ix2]+=motifs[m].weight; ix1=next_pos(seqs[s1].data,len1,ix1); ix2=next_pos(seqs[s2].data,len2,ix2); } } else if(motifs[m].seq1-1 == s2 && motifs[m].seq2-1 == s1) { ix1=start_pos(seqs[s1].data,pos1); ix2=start_pos(seqs[s2].data,pos2); if(debug>=1) fprintf(stdout,"2. %d %d %d %d %d %d weight %d\n",s1,s2,pos1,pos2,ix1,ix2,motifs[m].weight); for(i=0;i=len2) break; if(ix2>=len1) break; /* if (motifs[m].weight>motif_score[ix2][ix1]) */ motif_score[ix2][ix1]+=motifs[m].weight; ix1=next_pos(seqs[s1].data,len2,ix1); ix2=next_pos(seqs[s2].data,len1,ix2); } } } if(debug>=2) { fprintf(stdout,"\n"); for(i=0;i