[0237bc]: libregex / op_regex.cpp Maximize Restore History

Download this file

op_regex.cpp    312 lines (254 with data), 7.5 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
/**
* @file op_regex.cpp
* This file contains implementation for a lightweight wrapper around
* libc regex, providing regular expression match and replace facility.
*
* @remark Copyright 2003 OProfile authors
* @remark Read the file COPYING
* @remark Idea comes from TextFilt project <http://textfilt.sourceforge.net>
*
* @author Philippe Elie
*/
#include <cerrno>
#include <iostream>
#include <fstream>
#include "string_manip.h"
#include "op_regex.h"
using namespace std;
namespace {
bad_regex::bad_regex(string const & pattern)
: op_exception(pattern)
{
}
string op_regerror(int err, regex_t const & regexp)
{
size_t needed_size = regerror(err, &regexp, 0, 0);
char * buffer = new char [needed_size];
regerror(err, &regexp, buffer, needed_size);
return buffer;
}
void op_regcomp(regex_t & regexp, string const & pattern)
{
int err = regcomp(&regexp, pattern.c_str(), REG_EXTENDED);
if (err) {
throw bad_regex("regcomp error: " + op_regerror(err, regexp)
+ " for pattern : " + pattern);
}
}
bool op_regexec(regex_t const & regex, char const * str, regmatch_t * match,
size_t nmatch)
{
return regexec(&regex, str, nmatch, match, 0) != REG_NOMATCH;
}
void op_regfree(regex_t & regexp)
{
regfree(&regexp);
}
// return the index number associated with a char seen in a "\x".
// Allowed range are for x is [0-9a-z] return size_t(-1) if x is not in
// these ranges.
size_t subexpr_index(char ch)
{
if (!isdigit(ch) && !(ch >='a' && ch <= 'z'))
return size_t(-1);
return ch >= 'a' ? ch - 'a' + 10 : ch - '0';
}
} // anonymous namespace
regular_expression_replace::regular_expression_replace(size_t limit_,
size_t limit_defs)
:
limit(limit_),
limit_defs_expansion(limit_defs)
{
}
regular_expression_replace::~regular_expression_replace()
{
for (size_t i = 0 ; i < v_regexp.size() ; ++i)
op_regfree(v_regexp[i]);
}
void regular_expression_replace::add_definition(string const & name,
string const & definition)
{
string expanded_definition;
expand_string(definition, expanded_definition);
defs[name] = expanded_definition;
}
void regular_expression_replace::add_pattern(string const & pattern,
string const & replace)
{
string expanded_pattern;
expand_string(pattern, expanded_pattern);
regex_t regexp;
op_regcomp(regexp, expanded_pattern);
v_regexp.push_back(regexp);
v_replace.push_back(replace);
}
void regular_expression_replace::expand_string(string const & input,
string & result)
{
string last, expanded(input);
size_t i = 0;
for (i = 0 ; i < limit_defs_expansion ; ++i) {
last = expanded;
expanded = substitute_definition(last);
if (expanded == last) {
break;
}
}
if (i == limit_defs_expansion) {
throw bad_regex("too many substitution for: + input");
}
result = last;
}
string regular_expression_replace::substitute_definition(string const & pattern)
{
string result;
bool previous_is_escape = false;
for (size_t i = 0 ; i < pattern.length() ; ++i) {
if (pattern[i] == '$' && !previous_is_escape) {
size_t pos = pattern.find('{', i);
if (pos != i + 1) {
throw bad_regex("invalid $ in pattern: " + pattern);
}
size_t end = pattern.find('}', i);
if (end == string::npos) {
throw bad_regex("no matching '}' in pattern: " + pattern);
}
string def_name = pattern.substr(pos+1, (end-pos) - 1);
if (defs.find(def_name) == defs.end()) {
throw bad_regex("definition not found and used in pattern: (" + def_name + ") " + pattern);
}
result += defs[def_name];
i = end;
} else {
if (pattern[i] == '\\' && !previous_is_escape) {
previous_is_escape = true;
} else {
previous_is_escape = false;
}
result += pattern[i];
}
}
return result;
}
// FIXME limit output string size ? (cause we can have exponential growing
// of output string through a rule "a" = "aa")
bool regular_expression_replace::execute(string & str) const
{
bool changed = true;
for (size_t nr_iter = 0; changed && nr_iter < limit ; ++nr_iter) {
changed = false;
for (size_t i = 0 ; i < v_regexp.size() ; ++i) {
if (do_execute(str, v_regexp[i], v_replace[i])) {
changed = true;
}
}
}
// this don't return if the input string has been changed but if
// we reach the limit number of iteration.
return changed == false;
}
bool regular_expression_replace::do_execute(string & str,
regex_t const & regexp,
string const & replace) const
{
bool changed = false;
regmatch_t match[max_match];
size_t last_pos = 0;
for (size_t nr_iter = 0;
op_regexec(regexp, str.c_str() + last_pos, match, max_match) &&
nr_iter < limit;
nr_iter++) {
changed = true;
do_replace(str, last_pos, replace, match);
}
return changed;
}
void regular_expression_replace::do_replace(string & str, size_t start_pos,
string const & replace,
regmatch_t const * match) const
{
string inserted;
for (size_t i = 0 ; i < replace.length() ; ++i) {
if (replace[i] == '\\') {
if (i == replace.length() - 1) {
throw bad_regex("illegal \\ trailer: " + replace);
}
++i;
if (replace[i] == '\\') {
inserted += '\\';
} else if (subexpr_index(replace[i]) != size_t(-1)) {
size_t sub_expr = subexpr_index(replace[i]);
if (sub_expr >= max_match) {
throw bad_regex("illegal group index :" + replace);
} else if (match[sub_expr].rm_so == -1 &&
match[sub_expr].rm_eo == -1) {
// empty match: nothing todo
} else if (match[sub_expr].rm_so == -1 ||
match[sub_expr].rm_eo == -1) {
throw bad_regex("illegal match: " + replace);
} else {
inserted += str.substr(match[sub_expr].rm_so, match[sub_expr].rm_eo - match[sub_expr].rm_so);
}
} else {
throw bad_regex("expect group index :" + replace);
}
} else {
inserted += replace[i];
}
}
size_t first = match[0].rm_so + start_pos;
size_t count = match[0].rm_eo - match[0].rm_so;
str.replace(first, count, inserted);
}
void setup_regex(regular_expression_replace & regex,
string const & filename)
{
ifstream in(filename.c_str());
if (!in) {
throw op_runtime_error("Can't open file " + filename +
" for reading", errno);
}
regular_expression_replace var_name_rule;
var_name_rule.add_pattern("^\\$([_a-zA-Z][_a-zA-Z0-9]*)[ ]*=.*", "\\1");
regular_expression_replace var_value_rule;
var_value_rule.add_pattern(".*=[ ]*\"(.*)\"", "\\1");
regular_expression_replace left_rule;
left_rule.add_pattern("[ ]*\"(.*)\"[ ]*=.*", "\\1");
regular_expression_replace right_rule;
right_rule.add_pattern(".*=[ ]*\"(.*)\"", "\\1");
string line;
while (getline(in, line)) {
line = trim(line);
if (line.empty() || line[0] == '#')
continue;
string temp = line;
var_name_rule.execute(temp);
if (temp == line) {
string left = line;
left_rule.execute(left);
if (left == line) {
throw bad_regex("invalid input file: " +
'"' + line + '"');
}
string right = line;
right_rule.execute(right);
if (right == line) {
throw bad_regex("invalid input file: "
+ '"' + line + '"');
}
regex.add_pattern(left, right);
} else {
// temp != line ==> var_name_rule succeed to substitute
// into temp the var_name present in line
string var_name = temp;
string var_value = line;
var_value_rule.execute(var_value);
if (var_value == line) {
throw bad_regex("invalid input file: " +
'"' + line + '"');
}
regex.add_definition(var_name, var_value);
}
}
}