Diff of /inst/aminolookup.m [000000] .. [3f48a5]  Maximize  Restore

  Switch to unified view

a b/inst/aminolookup.m
1
## Copyright (C) 2008 Bill Denney
2
##
3
## This software is free software; you can redistribute it and/or modify it
4
## under the terms of the GNU General Public License as published by
5
## the Free Software Foundation; either version 3 of the License, or (at
6
## your option) any later version.
7
##
8
## This software is distributed in the hope that it will be useful, but
9
## WITHOUT ANY WARRANTY; without even the implied warranty of
10
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
## General Public License for more details.
12
##
13
## You should have received a copy of the GNU General Public License
14
## along with this software; see the file COPYING.  If not, see
15
## <http://www.gnu.org/licenses/>.
16
17
## -*- texinfo -*-
18
## @deftypefn {Function File} {} aminolookup ()
19
## @deftypefnx {Function File} {@var{aminodesc} =} aminolookup (@var{seq})
20
## @deftypefnx {Function File} {@var{aminodesc} =} aminolookup (@var{searchtype}, @var{seq})
21
## Convert between amino acid representations.  The types of input are
22
##
23
## @itemize @bullet
24
## @item Name
25
##
26
## The amino acid name
27
## @item Code
28
##
29
## The amino acid single letter code
30
## @item Abbreviation
31
##
32
## The three letter abbreviation for the amino acid
33
## @item Integer
34
##
35
## The number representation of the amino acid
36
## @end itemize
37
##
38
## To see the full list of each of the above, run this function without
39
## any arguments or outputs.
40
##
41
42
## If called without zero inputs, this will display the mapping between
43
## the above types.  If called with one input, it will convert to code by default or 
44
45
## @seealso{aa2int,int2aa,int2nt,nt2int}
46
## @end deftypefn
47
48
## Author: Bill Denney <bill@denney.ws>
49
50
function result = aminolookup (varargin)
51
52
  persistent code num abbr name seq
53
54
  if isempty (code)
55
    code = "ARNDCQEGHILKMFPSTWYVBZX*-";
56
    num = 1:25;
57
    abbr = {"Ala" "Arg" "Asn" "Asp" "Cys" "Gln" "Glu" "Gly" "His" "Ile" \
58
            "Leu" "Lys" "Met" "Phe" "Pro" "Ser" "Thr" "Trp" "Tyr" "Val" \
59
            "Asx" "Glx" "Xaa" "END" "GAP"};
60
61
    name = cell (numel (code), 3);
62
    name = {"Alanine" "Arginine" "Asparagine" "Aspartic acid" "Cysteine" \
63
            "Glutamine" "Glutamic acid" "Glycine" "Histidine" \
64
            "Isoleucine" "Leucine" "Lysine" "Methionine" "Phenylalanine" \
65
            "Proline" "Serine" "Threonine" "Tryptophan" "Tyrosine" \
66
            "Valine" "Asparagine" "Glutamine" "Any amino acid" \
67
            "Termination codon (translation stop)" \
68
            "Gap of unknown length"}';
69
    ## Alternate names
70
    name{4,2} = "Aspartate";
71
    name{7,2} = "Glutamate";
72
    name{21,2} = "Aspartic acid";
73
    name{22,2} = "Glutamic acid";
74
    name{21,3} = "Aspartate";
75
    name{22,3} = "Glutamate";
76
    for i = 1:numel (name)
77
      if isempty (name{i})
78
        name{i} = "";
79
      endif
80
    endfor
81
82
    seq = {"GCU GCC GCA GCG" "CGU CGC CGA CGG AGA AGG" "AAU AAC" "GAU GAC" \
83
           "UGU UGC" "CAA CAG" "GAA GAG" "GGU GGC GGA GGG" "CAU CAC" \
84
           "AUU AUC AUA" "UUA UUG CUU CUC CUA CUG" "AAA AAG" "AUG" \
85
           "UUU UUC" "CCU CCC CCA CCG" "UCU UCC UCA UCG AGU AGC" \
86
           "ACU ACC ACA ACG" "UGG" "UAU UAC" "GUU GUC GUA GUG" \
87
           "AAU AAC GAU GAC" "CAA CAG GAA GAG" "All codons" "UAA UAG UGA" \
88
           "NA"};
89
  endif
90
91
  searchtype = "";
92
  value = "";
93
94
  if (nargin == 0)
95
    n = cell (rows (name), 4);
96
    n(:,1) = name(:,1);
97
    for i = 1:rows (name)
98
      for j = 2:columns (name)
99
        if (! isempty (name{i,j}))
100
          n{i,1} = sprintf ("%s or %s", n{i,1}, name{i,j});
101
        endif
102
      endfor
103
      n{i,2} = num2str (i);
104
      n{i,3} = code(i);
105
      n{i,4} = seq{i};
106
    endfor
107
    s = max (cellfun (@numel, n));
108
    fmt = sprintf ("%%-%ds ", s(1), s(2), s(3), s(4));
109
    for i = 1:rows (n)
110
      printf ([fmt "\n"], n{i,:});
111
    endfor
112
  elseif (nargin == 1)
113
    showmany = 1;
114
    if isnumeric (varargin{1})
115
      ## this is an extension of the matlab options
116
      searchtype = "integer";
117
    elseif ischar (varargin{1})
118
      if ((mod (numel (varargin{1}), 3) == 0) &&
119
          (find (isupper (varargin{1})) == 1:3:numel (varargin{1})))
120
        ## if the number of characters is divisible by 3 and exactly
121
        ## every third character is upper case
122
        searchtype = "abbreviation";
123
      else
124
        searchtype = "code";
125
      endif
126
    endif
127
    value = varargin{1};
128
  else
129
    showmany = 0;
130
    searchtype = lower (varargin{1});
131
    value = varargin{2};
132
  endif
133
134
  if (rows (value) > 1)
135
    error ("aminolookup: value may only be one row")
136
  endif
137
138
  if (showmany == 1)
139
    ## we need to convert many inputs into one output.  First convert
140
    ## the input value into the integer form.
141
    switch lower (searchtype)
142
      case "code"
143
        value = upper (value);
144
        newvalue = -ones (size (value));
145
        for i = 1:numel (code)
146
          newvalue(value(:) == code(i)) = i;
147
        endfor
148
        outtype = "abbreviation";
149
      case "abbreviation"
150
        newvalue = -ones (1, numel (value)/3);
151
        for i = 1:3:numel (value)
152
          newvalue((i-1)/3+1) = find (strcmp (value(i:i+2), abbr), 1);
153
        endfor
154
        outtype = "code";
155
      case "integer"
156
        newvalue = value;
157
        outtype = "code";
158
      otherwise
159
        error (["aninolookup: cannot convert multiple arguments of any type\n"
160
                "but code, abbreviation, or integer"]);
161
    endswitch
162
    if (any (newvalue(:)) < 0)
163
      idx = find ((newvalue < 0) | (newvalue > numel (num)), 1);
164
      error ("aminolookup: unrecognised symbol in input at position %d", idx);
165
    endif
166
167
    switch outtype
168
      case "code"
169
        result = code(newvalue);
170
      case "abbreviation"
171
        result = strcat(abbr{newvalue});
172
      otherwise
173
        error ("aminolookup: invalid output type")
174
    endswitch
175
  elseif (showmany == 0)
176
    ## we're only showing one value
177
    switch lower (searchtype)
178
      case "integer"
179
        if isempty (value)
180
          result = ints;
181
        else
182
          result = sprintf ("%s %s %s", code(value), abbr(value), name(value));
183
        endif
184
      case "code"
185
        if isempty (value)
186
          result = code;
187
        else
188
          idx = find (lower (value) == lower(code), 1);
189
          result = sprintf ("%s %s", abbr{idx}, name{idx});
190
        endif
191
      case "abbreviation"
192
        if isempty (value)
193
          result = abbr;
194
        else
195
          idx = find (strcmpi (value, abbr));
196
          result = sprintf ("%s %s", code(idx), name{idx});
197
        endif
198
      case "name"
199
        if isempty (value)
200
          result = name;
201
        else
202
          [idx whocares] = find (strcmpi (value, name), 1);
203
          result = sprintf ("%s %s", code(idx), abbr{idx});
204
        endif
205
      otherwise
206
        error ("aminolookup: invalid search type, %s", searchtype)
207
    endswitch
208
  endif
209
210
endfunction
211
212
## Tests
213
%!shared code, abbr, ints
214
%! code = "MWKQAEDIRDIYDF";
215
%! abbr = "MetTrpLysGlnAlaGluAspIleArgAspIleTyrAspPhe";
216
%! ints = [13 18 12 6 1 7 4 10 2 4 10 19 4 14];
217
%!assert (aminolookup(code), abbr)
218
%!assert (aminolookup(abbr), code)
219
%!assert (aminolookup(ints), code)
220
%!assert (aminolookup("Code", "R"), "Arg Arginine")
221
%!assert (aminolookup("Integer", 1), "A Ala Alanine")
222
%!assert (aminolookup("Abbreviation", "asn"), "N Asparagine")
223
%!assert (aminolookup("Name", "proline"), "P Pro")

Get latest updates about Open Source Projects, Conferences and News.

Sign up for the SourceForge newsletter:





No, thanks