[b8d151]: inst / cleave.m Maximize Restore History

Download this file

cleave.m    86 lines (78 with data), 3.2 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
## Copyright (C) 2008 Bill Denney
##
## This software is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 3 of the License, or (at
## your option) any later version.
##
## This software is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this software; see the file COPYING. If not, see
## <http://www.gnu.org/licenses/>.
## -*- texinfo -*-
## @deftypefn {Function File} {@var{fragments} =} cleave (@var{sequence}, @var{pattern}, @var{position})
## @deftypefnx {Function File} {[@var{fragments}, @var{cuttingsites}] =} cleave (@dots{})
## @deftypefnx {Function File} {[@var{fragments}, @var{cuttingsites}, @var{lengths}] =} cleave (@dots{})
## @deftypefnx {Function File} {[@dots{}] =} cleave (@dots{}, "PartialDigest", @var{prob})
## Celave a peptide @var{sequence} using the @var{pattern} at the
## @var{position} relative to the pattern. The @var{sequence} is a
## sequence of amino acids; the @var{pattern} is a regular expression to
## find the location of the cleavage; and the @var{position} is the
## position relative to that regular expression (0 is immediately to the
## left of the first character, 1 is immediately to the right of the
## first character, @dots{}).
##
## @multitable
## @item Protease @tab Peptide Pattern @tab Position
## @item Trypsin @tab [KR](?!P) @tab 1
## @item Chymotrypsin @tab [WYF](?!P) @tab 1
## @item Glutamine C @tab [ED](?!P) @tab 1
## @item Lysine C @tab [K](?!P) @tab 1
## @item Aspartic acid N @tab D @tab 1
## @end multitable
##
## @example
##
## @seealso{rebasecuts, restrict, seqshowwords, regexp}
## @end deftypefn
## Author: Bill Denney <bill@denney.ws>
function [fragments, s, lengths] = cleave (seq, pattern, pos, varargin)
if isstruct (seq)
seq = seq.sequence;
endif
if (rows (seq) != 1)
error ("seqreverse: the sequence must have a single row");
endif
if (! isempty (varargin))
## FIXME: add support for partialdigest
error ("cleave: additional parameters are not yet supported")
endif
s = [0 (pos - 1 + regexp (seq, pattern))];
## There is no cleavage at the end-- there was already a cleavage
## there.
if (s(end) == length(seq))
s(end) = [];
endif
fragments = cell (size (s));
lengths = [s(2:end) length(seq)] - s;
for i = 1:numel (s)
fragments{i} = seq(s(i)+1:s(i)+lengths(i));
endfor
endfunction
## Tests
%!test
%! site = [0 6 7 41 46 67 74 80 92 100];
%! len = [6 1 34 5 21 7 6 12 8 10];
%! frag = {"MGTGGR" "R" "GAAAAPLLVAVAALLLGAAGHLYPGEVCPGMDIR" "NNLTR" \
%! "LHELENCSVIEGHLQILLMFK" "TRPEDFR" "DLSFPK" "LIMITDYLLLFR" \
%! "VYGLESLK" "DLFPNLTVIR"};
%! seq = strcat (frag{:});
%! [tfrag, tsite, tlen] = cleave (seq, '[KR](?!P)', 1);
%! assert (tfrag, frag)
%! assert (tsite, site)
%! assert (tlen, len)
## FIXME: This needs a test for partialdigest.