Diff of /lexers/LexLaTeX.cxx [000000] .. [e3ad39]  Maximize  Restore

  Switch to unified view

a b/lexers/LexLaTeX.cxx
1
// Scintilla source code edit control
2
/** @file LexLaTeX.cxx
3
 ** Lexer for LaTeX2e.
4
  **/
5
// Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
6
// The License.txt file describes the conditions under which this software may be distributed.
7
8
// Modified by G. HU in 2013. Added folding, syntax highting inside math environments, and changed some minor behaviors.
9
10
#include <stdlib.h>
11
#include <string.h>
12
#include <stdio.h>
13
#include <stdarg.h>
14
#include <assert.h>
15
#include <ctype.h>
16
#include <vector>
17
18
#include "ILexer.h"
19
#include "Scintilla.h"
20
#include "SciLexer.h"
21
22
#include "PropSetSimple.h"
23
#include "WordList.h"
24
#include "LexAccessor.h"
25
#include "Accessor.h"
26
#include "StyleContext.h"
27
#include "CharacterSet.h"
28
#include "LexerModule.h"
29
#include "LexerBase.h"
30
31
#ifdef SCI_NAMESPACE
32
using namespace Scintilla;
33
#endif
34
35
using namespace std;
36
37
struct latexFoldSave {
38
  latexFoldSave() : structLev(0) {
39
      for (int i = 0; i < 8; ++i) openBegins[i] = 0;
40
  }
41
  latexFoldSave(const latexFoldSave &save) : structLev(save.structLev) {
42
      for (int i = 0; i < 8; ++i) openBegins[i] = save.openBegins[i];
43
  }
44
  int openBegins[8];
45
  int structLev;
46
};
47
48
class LexerLaTeX : public LexerBase {
49
private:
50
  vector<int> modes;
51
  void setMode(int line, int mode) {
52
      if (line >= static_cast<int>(modes.size())) modes.resize(line + 1, 0);
53
      modes[line] = mode;
54
  }
55
  int getMode(int line) {
56
      if (line >= 0 && line < static_cast<int>(modes.size())) return modes[line];
57
      return 0;
58
  }
59
  void truncModes(int numLines) {
60
      if (static_cast<int>(modes.size()) > numLines * 2 + 256)
61
          modes.resize(numLines + 128);
62
  }
63
  
64
  vector<latexFoldSave> saves;
65
  void setSave(int line, const latexFoldSave &save) {
66
      if (line >= static_cast<int>(saves.size())) saves.resize(line + 1);
67
      saves[line] = save;
68
  }
69
  void getSave(int line, latexFoldSave &save) {
70
      if (line >= 0 && line < static_cast<int>(saves.size())) save = saves[line];
71
      else {
72
          save.structLev = 0;
73
          for (int i = 0; i < 8; ++i) save.openBegins[i] = 0;
74
      }
75
  }
76
  void truncSaves(int numLines) {
77
      if (static_cast<int>(saves.size()) > numLines * 2 + 256)
78
          saves.resize(numLines + 128);
79
  }
80
public:
81
  static ILexer *LexerFactoryLaTeX() {
82
      return new LexerLaTeX();
83
  }
84
  void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
85
  void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
86
};
87
88
static bool latexIsSpecial(int ch) {
89
  return (ch == '#') || (ch == '$') || (ch == '%') || (ch == '&') || (ch == '_') ||
90
         (ch == '{') || (ch == '}') || (ch == ' ');
91
}
92
93
static bool latexIsBlank(int ch) {
94
  return (ch == ' ') || (ch == '\t');
95
}
96
97
static bool latexIsBlankAndNL(int ch) {
98
  return (ch == ' ') || (ch == '\t') || (ch == '\r') || (ch == '\n');
99
}
100
101
static bool latexIsLetter(int ch) {
102
  return isascii(ch) && isalpha(ch);
103
}
104
105
static bool latexIsTagValid(int &i, int l, Accessor &styler) {
106
  while (i < l) {
107
      if (styler.SafeGetCharAt(i) == '{') {
108
          while (i < l) {
109
              i++;
110
              if (styler.SafeGetCharAt(i) == '}') {
111
                  return true;
112
              }   else if (!latexIsLetter(styler.SafeGetCharAt(i)) &&
113
                   styler.SafeGetCharAt(i)!='*') {
114
                  return false;
115
              }
116
          }
117
      } else if (!latexIsBlank(styler.SafeGetCharAt(i))) {
118
          return false;
119
      }
120
      i++;
121
  }
122
  return false;
123
}
124
125
static bool latexNextNotBlankIs(int i, Accessor &styler, char needle) {
126
  char ch;
127
  while (i < styler.Length()) {
128
    ch = styler.SafeGetCharAt(i);
129
      if (!latexIsBlankAndNL(ch) && ch != '*') {
130
      if (ch == needle)
131
        return true;
132
      else
133
        return false;
134
      }
135
      i++;
136
  }
137
  return false;
138
}
139
140
static bool latexLastWordIs(int start, Accessor &styler, const char *needle) {
141
  unsigned int i = 0;
142
  unsigned int l = static_cast<unsigned int>(strlen(needle));
143
  int ini = start-l+1;
144
  char s[32];
145
146
  while (i < l && i < 31) {
147
      s[i] = styler.SafeGetCharAt(ini + i);
148
      i++;
149
  }
150
  s[i] = '\0';
151
152
  return (strcmp(s, needle) == 0);
153
}
154
155
static bool latexLastWordIsMathEnv(int pos, Accessor &styler) {
156
  int i, j;
157
  char s[32];
158
  const char *mathEnvs[] = { "align", "alignat", "flalign", "gather",
159
      "multiline", "displaymath", "eqnarray", "equation" };
160
  if (styler.SafeGetCharAt(pos) != '}') return false;
161
  for (i = pos - 1; i >= 0; --i) {
162
      if (styler.SafeGetCharAt(i) == '{') break;
163
      if (pos - i >= 20) return false;
164
  }
165
  if (i < 0 || i == pos - 1) return false;
166
  ++i;
167
  for (j = 0; i + j < pos; ++j)
168
      s[j] = styler.SafeGetCharAt(i + j);
169
  s[j] = '\0';
170
  if (j == 0) return false;
171
  if (s[j - 1] == '*') s[--j] = '\0';
172
  for (i = 0; i < static_cast<int>(sizeof(mathEnvs) / sizeof(const char *)); ++i)
173
      if (strcmp(s, mathEnvs[i]) == 0) return true;
174
  return false;
175
}
176
177
static inline void latexStateReset(int &mode, int &state) {
178
  switch (mode) {
179
  case 1:     state = SCE_L_MATH; break;
180
  case 2:     state = SCE_L_MATH2; break;
181
  default:    state = SCE_L_DEFAULT; break;
182
  }
183
}
184
185
// There are cases not handled correctly, like $abcd\textrm{what is $x+y$}z+w$.
186
// But I think it's already good enough.
187
void SCI_METHOD LexerLaTeX::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
188
  // startPos is assumed to be the first character of a line
189
  Accessor styler(pAccess, &props);
190
  styler.StartAt(startPos);
191
  int mode = getMode(styler.GetLine(startPos) - 1);
192
  int state = initStyle;
193
  if (state == SCE_L_ERROR || state == SCE_L_SHORTCMD || state == SCE_L_SPECIAL)   // should not happen
194
      latexStateReset(mode, state);
195
  
196
  char chNext = styler.SafeGetCharAt(startPos);
197
  char chVerbatimDelim = '\0';
198
  styler.StartSegment(startPos);
199
  int lengthDoc = startPos + length;
200
201
  for (int i = startPos; i < lengthDoc; i++) {
202
      char ch = chNext;
203
      chNext = styler.SafeGetCharAt(i + 1);
204
205
      if (styler.IsLeadByte(ch)) {
206
          i++;
207
          chNext = styler.SafeGetCharAt(i + 1);
208
          continue;
209
      }
210
      
211
      if (ch == '\r' || ch == '\n')
212
          setMode(styler.GetLine(i), mode);
213
214
      switch (state) {
215
      case SCE_L_DEFAULT :
216
          switch (ch) {
217
          case '\\' :
218
              styler.ColourTo(i - 1, state);
219
              if (latexIsLetter(chNext)) {
220
                  state = SCE_L_COMMAND;
221
              } else if (latexIsSpecial(chNext)) {
222
                  styler.ColourTo(i + 1, SCE_L_SPECIAL);
223
                  i++;
224
                  chNext = styler.SafeGetCharAt(i + 1);
225
              } else if (chNext == '\r' || chNext == '\n') {
226
                  styler.ColourTo(i, SCE_L_ERROR);
227
              } else {
228
                  styler.ColourTo(i + 1, SCE_L_SHORTCMD);
229
                  if (chNext == '(') {
230
                      mode = 1;
231
                      state = SCE_L_MATH;
232
                  } else if (chNext == '[') {
233
                      mode = 2;
234
                      state = SCE_L_MATH2;
235
                  }
236
                  i++;
237
                  chNext = styler.SafeGetCharAt(i + 1);
238
              }
239
              break;
240
          case '$' :
241
              styler.ColourTo(i - 1, state);
242
              if (chNext == '$') {
243
                  styler.ColourTo(i + 1, SCE_L_SHORTCMD);
244
                  mode = 2;
245
                  state = SCE_L_MATH2;
246
                  i++;
247
                  chNext = styler.SafeGetCharAt(i + 1);
248
              } else {
249
                  styler.ColourTo(i, SCE_L_SHORTCMD);
250
                  mode = 1;
251
                  state = SCE_L_MATH;
252
              }
253
              break;
254
          case '%' :
255
              styler.ColourTo(i - 1, state);
256
              state = SCE_L_COMMENT;
257
              break;
258
          }
259
          break;  
260
      // These 3 will never be reached.
261
      case SCE_L_ERROR:
262
      case SCE_L_SPECIAL:
263
      case SCE_L_SHORTCMD:
264
          break;
265
      case SCE_L_COMMAND :
266
          if (!latexIsLetter(chNext)) {
267
              styler.ColourTo(i, state);
268
              if (latexNextNotBlankIs(i + 1, styler, '[' )) {
269
                  state = SCE_L_CMDOPT;
270
              } else if (latexLastWordIs(i, styler, "\\begin")) {
271
                  state = SCE_L_TAG;
272
              } else if (latexLastWordIs(i, styler, "\\end")) {
273
                  state = SCE_L_TAG2;
274
              } else if (latexLastWordIs(i, styler, "\\verb") && chNext != '*' && chNext != ' ') {
275
                  chVerbatimDelim = chNext;
276
                  state = SCE_L_VERBATIM;
277
              } else {
278
                  latexStateReset(mode, state);
279
              }
280
          }
281
          break;
282
      case SCE_L_CMDOPT :
283
          if (ch == ']') {
284
              styler.ColourTo(i, state);
285
              latexStateReset(mode, state);
286
          }
287
          break;
288
      case SCE_L_TAG :
289
          if (latexIsTagValid(i, lengthDoc, styler)) {
290
              styler.ColourTo(i, state);
291
              latexStateReset(mode, state);
292
              if (latexLastWordIs(i, styler, "{verbatim}")) {
293
                  state = SCE_L_VERBATIM;
294
              } else if (latexLastWordIs(i, styler, "{comment}")) {
295
                  state = SCE_L_COMMENT2;
296
              } else if (latexLastWordIs(i, styler, "{math}") && mode == 0) {
297
                  mode = 1;
298
                  state = SCE_L_MATH;
299
              } else if (latexLastWordIsMathEnv(i, styler) && mode == 0) {
300
                  mode = 2;
301
                  state = SCE_L_MATH2;
302
              }
303
          } else {
304
              styler.ColourTo(i, SCE_L_ERROR);
305
              latexStateReset(mode, state);
306
              ch = styler.SafeGetCharAt(i);
307
              if (ch == '\r' || ch == '\n') setMode(styler.GetLine(i), mode);
308
          }
309
          chNext = styler.SafeGetCharAt(i+1);
310
          break;
311
      case SCE_L_TAG2 :
312
          if (latexIsTagValid(i, lengthDoc, styler)) {
313
              styler.ColourTo(i, state);
314
              latexStateReset(mode, state);
315
          } else {
316
              styler.ColourTo(i, SCE_L_ERROR);
317
              latexStateReset(mode, state);
318
              ch = styler.SafeGetCharAt(i);
319
              if (ch == '\r' || ch == '\n') setMode(styler.GetLine(i), mode);
320
          }
321
          chNext = styler.SafeGetCharAt(i+1);
322
          break;
323
      case SCE_L_MATH :
324
          switch (ch) {
325
          case '\\' :
326
              styler.ColourTo(i - 1, state);
327
              if (latexIsLetter(chNext)) {
328
                  int match = i + 3;
329
                  if (latexLastWordIs(match, styler, "\\end")) {
330
                      match++;
331
                      if (latexIsTagValid(match, lengthDoc, styler)) {
332
                          if (latexLastWordIs(match, styler, "{math}"))
333
                              mode = 0;
334
                      }
335
                  }
336
                  state = SCE_L_COMMAND;
337
              } else if (latexIsSpecial(chNext)) {
338
                  styler.ColourTo(i + 1, SCE_L_SPECIAL);
339
                  i++;
340
                  chNext = styler.SafeGetCharAt(i + 1);
341
              } else if (chNext == '\r' || chNext == '\n') {
342
                  styler.ColourTo(i, SCE_L_ERROR);
343
              } else {
344
                  if (chNext == ')') {
345
                      mode = 0;
346
                      state = SCE_L_DEFAULT;
347
                  }
348
                  styler.ColourTo(i + 1, SCE_L_SHORTCMD);
349
                  i++;
350
                  chNext = styler.SafeGetCharAt(i + 1);
351
              }
352
              break;
353
          case '$' :
354
              styler.ColourTo(i - 1, state);
355
              styler.ColourTo(i, SCE_L_SHORTCMD);
356
              mode = 0;
357
              state = SCE_L_DEFAULT;
358
              break;
359
          case '%' :
360
              styler.ColourTo(i - 1, state);
361
              state = SCE_L_COMMENT;
362
              break;
363
          }
364
          break;
365
      case SCE_L_MATH2 :
366
          switch (ch) {
367
          case '\\' :
368
              styler.ColourTo(i - 1, state);
369
              if (latexIsLetter(chNext)) {
370
                  int match = i + 3;
371
                  if (latexLastWordIs(match, styler, "\\end")) {
372
                      match++;
373
                      if (latexIsTagValid(match, lengthDoc, styler)) {
374
                          if (latexLastWordIsMathEnv(match, styler))
375
                              mode = 0;
376
                      }
377
                  }
378
                  state = SCE_L_COMMAND;
379
              } else if (latexIsSpecial(chNext)) {
380
                  styler.ColourTo(i + 1, SCE_L_SPECIAL);
381
                  i++;
382
                  chNext = styler.SafeGetCharAt(i + 1);
383
              } else if (chNext == '\r' || chNext == '\n') {
384
                  styler.ColourTo(i, SCE_L_ERROR);
385
              } else {
386
                  if (chNext == ']') {
387
                      mode = 0;
388
                      state = SCE_L_DEFAULT;
389
                  }
390
                  styler.ColourTo(i + 1, SCE_L_SHORTCMD);
391
                  i++;
392
                  chNext = styler.SafeGetCharAt(i + 1);
393
              }
394
              break;
395
          case '$' :
396
              styler.ColourTo(i - 1, state);
397
              if (chNext == '$') {
398
                  styler.ColourTo(i + 1, SCE_L_SHORTCMD);
399
                  i++;
400
                  chNext = styler.SafeGetCharAt(i + 1);
401
                  mode = 0;
402
                  state = SCE_L_DEFAULT;
403
              } else { // This may not be an error, e.g. \begin{equation}\text{$a$}\end{equation} 
404
                  styler.ColourTo(i, SCE_L_SHORTCMD);
405
              }
406
              break;
407
          case '%' :
408
              styler.ColourTo(i - 1, state);
409
              state = SCE_L_COMMENT;
410
              break;
411
          }
412
          break;
413
      case SCE_L_COMMENT :
414
          if (ch == '\r' || ch == '\n') {
415
              styler.ColourTo(i - 1, state);
416
              latexStateReset(mode, state);
417
          }
418
          break;
419
      case SCE_L_COMMENT2 :
420
          if (ch == '\\') {
421
              int match = i + 3;
422
              if (latexLastWordIs(match, styler, "\\end")) {
423
                  match++;
424
                  if (latexIsTagValid(match, lengthDoc, styler)) {
425
                      if (latexLastWordIs(match, styler, "{comment}")) {
426
                          styler.ColourTo(i - 1, state);
427
                          state = SCE_L_COMMAND;
428
                      }
429
                  }
430
              }
431
          }
432
          break;
433
      case SCE_L_VERBATIM :
434
          if (ch == '\\') {
435
              int match = i + 3;
436
              if (latexLastWordIs(match, styler, "\\end")) {
437
                  match++;
438
                  if (latexIsTagValid(match, lengthDoc, styler)) {
439
                      if (latexLastWordIs(match, styler, "{verbatim}")) {
440
                          styler.ColourTo(i - 1, state);
441
                          state = SCE_L_COMMAND;
442
                      }
443
                  }
444
              }
445
          } else if (chNext == chVerbatimDelim) {
446
              styler.ColourTo(i + 1, state);
447
              latexStateReset(mode, state);
448
              chVerbatimDelim = '\0';
449
              i++;
450
              chNext = styler.SafeGetCharAt(i + 1);
451
          } else if (chVerbatimDelim != '\0' && (ch == '\n' || ch == '\r')) {
452
              styler.ColourTo(i, SCE_L_ERROR);
453
              latexStateReset(mode, state);
454
              chVerbatimDelim = '\0';
455
          }
456
          break;
457
      }
458
  }
459
  if (lengthDoc == styler.Length()) truncModes(styler.GetLine(lengthDoc - 1));
460
  styler.ColourTo(lengthDoc - 1, state);
461
  styler.Flush();
462
}
463
464
static int latexFoldSaveToInt(const latexFoldSave &save) {
465
  int sum = 0;
466
  for (int i = 0; i <= save.structLev; ++i)
467
      sum += save.openBegins[i];
468
  return ((sum + save.structLev + SC_FOLDLEVELBASE) & SC_FOLDLEVELNUMBERMASK);
469
}
470
471
// Change folding state while processing a line
472
// Return the level before the first relevant command
473
void SCI_METHOD LexerLaTeX::Fold(unsigned int startPos, int length, int, IDocument *pAccess) {
474
  const char *structWords[7] = {"part", "chapter", "section", "subsection",
475
      "subsubsection", "paragraph", "subparagraph"};
476
  Accessor styler(pAccess, &props);
477
  unsigned int endPos = startPos + length;
478
  int curLine = styler.GetLine(startPos);
479
  latexFoldSave save;
480
  getSave(curLine - 1, save);
481
  do {
482
      char ch, buf[16];
483
      int i, j, lev = -1;
484
      bool needFold = false;
485
      for (i = static_cast<int>(startPos); i < static_cast<int>(endPos); ++i) {
486
          ch = styler.SafeGetCharAt(i);
487
          if (ch == '\r' || ch == '\n') break;
488
          if (ch != '\\' || styler.StyleAt(i) != SCE_L_COMMAND) continue;
489
          for (j = 0; j < 15 && i + 1 < static_cast<int>(endPos); ++j, ++i) {
490
              buf[j] = styler.SafeGetCharAt(i + 1);
491
              if (!latexIsLetter(buf[j])) break;
492
          }
493
          buf[j] = '\0';
494
          if (strcmp(buf, "begin") == 0) {
495
              if (lev < 0) lev = latexFoldSaveToInt(save);
496
              ++save.openBegins[save.structLev];
497
              needFold = true;
498
          }
499
          else if (strcmp(buf, "end") == 0) {
500
              while (save.structLev > 0 && save.openBegins[save.structLev] == 0)
501
                  --save.structLev;
502
              if (lev < 0) lev = latexFoldSaveToInt(save);
503
              if (save.openBegins[save.structLev] > 0) --save.openBegins[save.structLev];
504
          }
505
          else {
506
              for (j = 0; j < 7; ++j)
507
                  if (strcmp(buf, structWords[j]) == 0) break;
508
              if (j >= 7) continue;
509
              save.structLev = j;   // level before the command
510
              for (j = save.structLev + 1; j < 8; ++j) {
511
                  save.openBegins[save.structLev] += save.openBegins[j];
512
                  save.openBegins[j] = 0;
513
              }
514
              if (lev < 0) lev = latexFoldSaveToInt(save);
515
              ++save.structLev;   // level after the command
516
              needFold = true;
517
          }
518
      }
519
      if (lev < 0) lev = latexFoldSaveToInt(save);
520
      if (needFold) lev |= SC_FOLDLEVELHEADERFLAG;
521
      styler.SetLevel(curLine, lev);
522
      setSave(curLine, save);
523
      ++curLine;
524
      startPos = styler.LineStart(curLine);
525
      if (static_cast<int>(startPos) == styler.Length()) {
526
          lev = latexFoldSaveToInt(save);
527
          styler.SetLevel(curLine, lev);
528
          setSave(curLine, save);
529
          truncSaves(curLine);
530
      }
531
  } while (startPos < endPos);
532
  styler.Flush();
533
}
534
535
static const char *const emptyWordListDesc[] = {
536
  0
537
};
538
539
LexerModule lmLatex(SCLEX_LATEX, LexerLaTeX::LexerFactoryLaTeX, "latex", emptyWordListDesc);

Get latest updates about Open Source Projects, Conferences and News.

Sign up for the SourceForge newsletter:





No, thanks