From: Jimmy E. <en...@us...> - 2006-10-26 00:37:57
|
Update of /cvsroot/sashimi/misc In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv12442 Modified Files: digestdb1.c Log Message: revert isascii() to isalpha() for valid sequence characters and also explicitly allow '*' to be in sequence (where '*' is meant to encapsulate a stop codon translation) Index: digestdb1.c =================================================================== RCS file: /cvsroot/sashimi/misc/digestdb1.c,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** digestdb1.c 24 Oct 2006 22:21:43 -0000 1.5 --- digestdb1.c 26 Oct 2006 00:37:54 -0000 1.6 *************** *** 106,109 **** --- 106,110 ---- printf(" only the 2nd (protein), 3rd (mass), and 5th (peptide) columns, type\n"); printf(" %s ipi.fasta | awk '{print $2 \"\\t\" $3 \"\\t\" $4}' > digest.txt\n\n", argv[0]); + printf(" Asterisks (*) in sequence are treated as proper break points\n\n"); exit(1); } *************** *** 315,319 **** while (cResidue=fgetc(fp)) { ! if (isascii(cResidue)) { pSeq.szSeq[pSeq.iLenSeq]=cResidue; --- 316,320 ---- while (cResidue=fgetc(fp)) { ! if (isalpha(cResidue) || cResidue=='*') { pSeq.szSeq[pSeq.iLenSeq]=cResidue; *************** *** 373,387 **** do { ! if ((strchr(pInput.szBreak, pSeq.szSeq[pPep.iEnd]) && !strchr(pInput.szNoBreak, pSeq.szSeq[pPep.iEnd+1])) || pPep.iEnd==pSeq.iLenSeq-1) { int i; pPep.dPepMass = dMassAA['o']+ 3*dMassAA['h']; for (i=pPep.iStart; i<=pPep.iEnd; i++) { pPep.dPepMass += dMassAA[pSeq.szSeq[i]]; } /* --- 374,394 ---- do { ! if (( (strchr(pInput.szBreak, pSeq.szSeq[pPep.iEnd]) && !strchr(pInput.szNoBreak, pSeq.szSeq[pPep.iEnd+1])) + || pSeq.szSeq[pPep.iEnd]=='*') || pPep.iEnd==pSeq.iLenSeq-1) { int i; + if (pSeq.szSeq[pPep.iEnd]=='*') + pPep.iEnd--; + pPep.dPepMass = dMassAA['o']+ 3*dMassAA['h']; for (i=pPep.iStart; i<=pPep.iEnd; i++) { pPep.dPepMass += dMassAA[pSeq.szSeq[i]]; + //printf("%c", pSeq.szSeq[i]); } + //printf(" %0.2f\n", pPep.dPepMass); /* *************** *** 429,447 **** } ! iMissed++; ! if (iMissed==1) /* first break point is start of next peptide */ ! iStartNextPeptide=pPep.iEnd+1; ! ! if (iMissed <= pInput.iMissedCleavage ! && pPep.dPepMass<pInput.dMaxMass ! && pPep.iEnd < pSeq.iLenSeq-1) { ! pPep.iEnd++; } else { ! iMissed=0; ! pPep.iStart=iStartNextPeptide; ! pPep.iEnd=pPep.iStart; } --- 436,473 ---- } ! if (pSeq.szSeq[pPep.iEnd+1] == '*') { ! iMissed=0; ! ! if (pPep.iStart==iStartNextPeptide) ! { ! pPep.iStart=pPep.iEnd+2; ! iStartNextPeptide=pPep.iEnd+2; ! pPep.iEnd=pPep.iStart; ! } ! else ! { ! pPep.iStart=iStartNextPeptide; ! pPep.iEnd=pPep.iStart; ! } } else { ! iMissed++; ! if (iMissed==1) /* first break point is start of next peptide */ ! iStartNextPeptide=pPep.iEnd+1; ! ! if (iMissed <= pInput.iMissedCleavage ! && pPep.dPepMass<pInput.dMaxMass ! && pPep.iEnd < pSeq.iLenSeq-1) ! { ! pPep.iEnd++; ! } ! else ! { ! iMissed=0; ! pPep.iStart=iStartNextPeptide; ! pPep.iEnd=pPep.iStart; ! } } *************** *** 449,452 **** --- 475,479 ---- else { + //printf("%c\n",pSeq.szSeq[pPep.iEnd]); pPep.iEnd++; } |