[r79]: src / classify_m.cc Maximize Restore History

Download this file

classify_m.cc    206 lines (170 with data), 6.9 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
#include <math.h>
#include <string.h>
#include <stdio.h>
#include "full_util.h"
#include "agf_lib.h"
using namespace std;
using namespace libagf;
using namespace libpetey;
int main(int argc, char *argv[]) {
char *testfile; //test data
char *outfile; //output classes
char *confile; //output confidences
FILE *fs;
FILE *logfs=stderr;
nel_ta ntrain; //number of training data points
dim_ta nvar; //number of variables
cls_ta ncls; //number of classes
nel_ta ntest; //number of test data points
real_a **brd; //class border samples
real_a **grd; //gradient at the class border
real_a **test; //test data vectors
cls_ta *result; //results of classification
multiclass_hier<real_a, cls_ta, agf2class<real_a, cls_ta> > *classifier;
int lineno;
real_a *con; //estimated confidence
real_a *pdf=NULL; //conditional probabilities (if applicable)
int errcode;
agf_command_opts opt_args;
//normalization data:
real_a **mat; //transformation matrix
real_a *ave; //constant term
dim_ta nvar1, nvar2;
errcode=0;
errcode=agf_parse_command_opts(argc, argv, "a:nu", &opt_args);
if (errcode==FATAL_COMMAND_OPTION_PARSE_ERROR) return errcode;
//parse the command line arguments:
if (argc != 3) {
FILE *helpfs=stdout;
fprintf(helpfs, "Syntax: classify_m [-u] [-a normfile] control test output\n");
fprintf(helpfs, "\n");
fprintf(helpfs, "where:\n");
fprintf(helpfs, " control control file\n");
fprintf(helpfs, " test file containing vector data to be classified\n");
fprintf(helpfs, " output files containing the results of the classification:\n");
fprintf(helpfs, " .cls for classes, .con for confidence ratings\n");
fprintf(helpfs, "\n");
fprintf(helpfs, "options:\n");
//printf(" -n option to normalise the data\n");
fprintf(helpfs, " -a normfile file containing normalization data (no default--\n");
fprintf(helpfs, " must always be specified explicitly)\n");
fprintf(helpfs, " -u model (borders) data is not normalized\n");
fprintf(helpfs, "\n");
fprintf(helpfs, "The syntax of the control file is as follows:\n\n");
fprintf(helpfs, " <branch> ::= <model> \"{\" <branch_list> \"}\" | <CLASS>\n");
fprintf(helpfs, " <model> ::= <FNAME> | <partition_list>\n");
fprintf(helpfs, " <branch_list> ::= <branch> | <branch_list> <branch>\n");
fprintf(helpfs, " <partition_list> ::= <partition> | <partition_list> <partition>\n");
fprintf(helpfs, " <partition> ::= <OPTIONS> <class_list> \" %c \" <class_list> \";\"\n", PARTITION_SYMBOL);
fprintf(helpfs, " <class_list> ::= <CLASS> | <class_list> <CLASS>\n");
fprintf(helpfs, " <CLASS> ::= 0 | 1 | 2 | 3 ... | <ncls-1>\n\n");
fprintf(helpfs, "where:\n");
fprintf(helpfs, " <FNAME> base-name of a pair of files describing the class borders\n");
fprintf(helpfs, " <CLASS> class number from zero (0) to the number of classes less one\n");
fprintf(helpfs, " <partition_list> describes a non-hierarchical multi-borders model\n");
fprintf(helpfs, "\n");
printf("\n");
return INSUFFICIENT_COMMAND_ARGS;
}
//read in class borders:
fs=fopen(argv[0], "r");
if (fs==NULL) {
fprintf(stderr, "classify_m: unable to open control file, %s\n", argv[0]);
exit(UNABLE_TO_OPEN_FILE_FOR_READING);
}
lineno=0;
classifier=new multiclass_hier<real_a, cls_ta, agf2class<real_a, cls_ta> >(fs, lineno);
fclose(fs);
testfile=argv[1];
outfile=new char[strlen(argv[2])+5];
strcpy(outfile, argv[2]);
strcat(outfile, ".cls");
confile=new char[strlen(argv[2])+5];
strcpy(confile, argv[2]);
strcat(confile, ".con");
test=read_vecfile(testfile, ntest, nvar);
if (nvar == -1 || ntest==-1) {
fprintf(stderr, "Error reading input file: %s\n", testfile);
exit(FILE_READ_ERROR);
}
if (test == NULL) {
fprintf(stderr, "Unable to open file for reading: %s\n", testfile);
exit(UNABLE_TO_OPEN_FILE_FOR_WRITING);
}
fprintf(logfs, "%d test vectors found in file %s\n", ntest, testfile);
//normalization:
if ((opt_args.uflag || opt_args.normflag) && opt_args.normfile==NULL) {
fprintf(stderr, "classify_m: please specify normalization file with -a\n");
exit(FATAL_COMMAND_OPTION_PARSE_ERROR);
//opt_args.normfile=new char [strlen(argv[0])+5];
//sprintf(opt_args.normfile, "%s.std", argv[0]);
}
if (opt_args.normfile!=NULL) {
mat=read_stats2(opt_args.normfile, ave, nvar1, nvar2);
errcode=classifier->ltran(mat, ave, nvar1, nvar2, opt_args.uflag);
if (errcode!=0) exit(errcode);
if (classifier->n_feat() != nvar) {
fprintf(stderr, "classify_m: Dimensions of classifier (%d) do not match those of test data (%d).\n",
classifier->n_feat(), nvar);
exit(DIMENSION_MISMATCH);
}
} else {
if (classifier->n_feat() != nvar) {
fprintf(stderr, "classify_m: Dimension of classifier (%d) do not match dimension of test data (%d).\n",
classifier->n_feat(), nvar);
exit(DIMENSION_MISMATCH);
}
}
//begin the classification scheme:
result=new cls_ta[ntest];
con=new real_a[ntest];
ncls=classifier->n_class();
if (classifier->max_depth() == 1 && ncls>2) {
pdf=new real_a[ncls];
for (nel_ta i=0; i<ntest; i++) {
result[i]=classifier->classify(test[i], pdf);
con[i]=(ncls*pdf[result[i]]-1)/(ncls-1);
//print results to standard out:
for (cls_ta j=0; j<ncls; j++) printf(" %9.6f", pdf[j]);
printf("% 4d", result[i]);
printf("\n");
}
delete [] pdf;
} else {
for (nel_ta i=0; i<ntest; i++) {
result[i]=classifier->classify(test[i], con[i]);
con[i]=(ncls*con[i]-1)/(ncls-1);
}
}
//printf("\n");
//write the results to a file:
fs=fopen(outfile, "w");
if (fs == NULL) {
fprintf(stderr, "Unable to open file, %s, for writing\n", outfile);
return UNABLE_TO_OPEN_FILE_FOR_WRITING;
}
fwrite(result, sizeof(cls_ta), ntest, fs);
fclose(fs);
//write the results to a file:
fs=fopen(confile, "w");
if (fs == NULL) {
fprintf(stderr, "Unable to open file, %s, for writing\n", confile);
return UNABLE_TO_OPEN_FILE_FOR_WRITING;
}
fwrite(con, sizeof(real_a), ntest, fs);
fclose(fs);
//clean up:
delete [] result;
delete [] con;
delete [] test[0];
delete [] test;
delete [] outfile;
delete [] confile;
if (opt_args.normfile!=NULL) {
delete [] opt_args.normfile;
delete_matrix(mat);
delete [] ave;
}
delete classifier;
return 0;
}