From: Jochen L. <lue...@us...> - 2005-03-26 07:25:29
|
Update of /cvsroot/e-p-i-c/org.epic.ext.cbg.editor/src/cbg/editor/rules In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv26340/src/cbg/editor/rules Modified Files: ExtendedPatternRule.java ColoringWhitespaceDetector.java TextSequenceRule.java StarRule.java Log Message: Performance improvements Index: TextSequenceRule.java =================================================================== RCS file: /cvsroot/e-p-i-c/org.epic.ext.cbg.editor/src/cbg/editor/rules/TextSequenceRule.java,v retrieving revision 1.7 retrieving revision 1.8 diff -u -d -r1.7 -r1.8 --- TextSequenceRule.java 27 Jan 2005 20:30:45 -0000 1.7 +++ TextSequenceRule.java 26 Mar 2005 07:25:19 -0000 1.8 @@ -70,13 +70,6 @@ fColumn= column; } - /* - * @see IRule#evaluate - */ - public IToken evaluate(ICharacterScanner scanner) { - return evaluate(scanner, false); - } - /** * Returns the characters in the buffer to the scanner. * @@ -86,14 +79,23 @@ for (int i= fBuffer.length() - 1; i >= 0; i--) scanner.unread(); } - public IToken evaluate(ICharacterScanner scanner, boolean resume) { + + /* (non-Javadoc) + * @see org.eclipse.jface.text.rules.IPredicateRule#evaluate(org.eclipse.jface.text.rules.ICharacterScanner, boolean) + */ + public IToken evaluate(ICharacterScanner scanner, boolean resume) { + // TODO Auto-generated method stub + return evaluate(scanner, false); + } + + public IToken evaluate(ICharacterScanner scanner) { if (fColumn == UNDEFINED) - return doEvaluate(scanner, resume); + return doEvaluateFinally(scanner); int c = scanner.read(); scanner.unread(); if (c == word[0]) - return (fColumn == scanner.getColumn() ? doEvaluate(scanner, resume) : Token.UNDEFINED); + return (fColumn == scanner.getColumn() ? doEvaluateFinally(scanner) : Token.UNDEFINED); else return Token.UNDEFINED; } @@ -107,25 +109,33 @@ return doEvaluate(scanner, false); } + protected IToken doEvaluate(ICharacterScanner scanner, boolean resume) { + return doEvaluateFinally(scanner); + } + /** * Same code as in ExtendePatternRule */ - protected IToken doEvaluate(ICharacterScanner scanner, boolean resume) { - myStepCounter = 0; - boolean continueCheck=true; + private final IToken doEvaluateFinally(ICharacterScanner scanner) { + //boolean continueCheck=true; - if (((ColoringPartitionScanner) scanner).getOffset() > 0) { - scanner.unread(); - curScannerChar = (char) scanner.read(); - if (isNotSequenceWhitespace && !whiteSpace.isWhitespace(curScannerChar)) { - //we do not check anything, since the leading char before this is not - //whitespace or equivalent - //BUT only if the current char is not already a Whitespace!!! - continueCheck = false; - } + if (isNotSequenceWhitespace) { + if (((ColoringPartitionScanner) scanner).getOffset() > 0) { + scanner.unread(); + curScannerChar = (char) scanner.read(); + if (!whiteSpace.isWhitespace(curScannerChar)) { + //we do not check anything, since the leading char before this is not + //whitespace or equivalent + //BUT only if the current char is not already a Whitespace!!! + //for speed improvements we return immediately + // continueCheck = false; + return Token.UNDEFINED; + } + } } - if (continueCheck) { + myStepCounter = 0; + if (isExistingGroup) { if (forwardStartSequenceDetected(scanner)) { curScannerChar= (char) scanner.read(); @@ -159,9 +169,10 @@ } } } - } - unwindScanner(scanner); + if (myStepCounter != 0) { + unwindScanner(scanner); + } return Token.UNDEFINED; } @@ -250,5 +261,4 @@ scanner.unread(); } } - } Index: ColoringWhitespaceDetector.java =================================================================== RCS file: /cvsroot/e-p-i-c/org.epic.ext.cbg.editor/src/cbg/editor/rules/ColoringWhitespaceDetector.java,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- ColoringWhitespaceDetector.java 25 Dec 2004 08:35:03 -0000 1.6 +++ ColoringWhitespaceDetector.java 26 Mar 2005 07:25:19 -0000 1.7 @@ -1,22 +1,22 @@ -package cbg.editor.rules; - -import java.util.*; -import org.eclipse.jface.text.rules.IWhitespaceDetector; - -public class ColoringWhitespaceDetector implements IWhitespaceDetector { - - // TODO EPIC workaround - static private Map whitespaces = new HashMap(); - - // TODO Added by EPIC (workaround) - public void addWhiteSpaceChar(String whitespaceChar) { - whitespaces.put(whitespaceChar, ""); - } - - public boolean isWhitespace(char c) { - // TODO Added by EPIC (workaround) - //return Character.isWhitespace(c); - return Character.isWhitespace(c) || whitespaces.get(String.valueOf(c)) != null; - } - -} +package cbg.editor.rules; + +import java.util.*; +import org.eclipse.jface.text.rules.IWhitespaceDetector; + +public class ColoringWhitespaceDetector implements IWhitespaceDetector { + + // TODO EPIC workaround + // static private Map whitespaces = new HashMap(); static private String whitespaces=""; + + // TODO Added by EPIC (workaround) + public void addWhiteSpaceChar(String whitespaceChar) { + whitespaces += whitespaceChar; + } + + public boolean isWhitespace(char c) { + // TODO Added by EPIC (workaround) + //return Character.isWhitespace(c); + return Character.isWhitespace(c) || (whitespaces.indexOf(c) >= 0); + } + +} Index: StarRule.java =================================================================== RCS file: /cvsroot/e-p-i-c/org.epic.ext.cbg.editor/src/cbg/editor/rules/StarRule.java,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- StarRule.java 25 Dec 2004 08:35:03 -0000 1.6 +++ StarRule.java 26 Mar 2005 07:25:19 -0000 1.7 @@ -1,264 +1,264 @@ -package cbg.editor.rules; -import org.eclipse.jface.text.rules.ICharacterScanner; -import org.eclipse.jface.text.rules.IPredicateRule; -import org.eclipse.jface.text.rules.IToken; -import org.eclipse.jface.text.rules.IWhitespaceDetector; -import org.eclipse.jface.text.rules.IWordDetector; -import org.eclipse.jface.text.rules.Token; -import cbg.editor.ColoringPartitionScanner; -import cbg.editor.jedit.Mark; -public class StarRule implements IPredicateRule { - protected boolean isPrevious, excludeMatch, atLineStart; - protected char[] text; - protected static final int UNDEFINED = -1; - /** The token to be returned on success */ - protected IToken fToken; - /** The pattern's column constrain */ - protected int fColumn = UNDEFINED; - /** The pattern's escape character */ - protected char fEscapeCharacter; - /** Indicates whether end of line termines the pattern */ - protected boolean fBreaksOnEOL; - protected IWhitespaceDetector whiteDetector; - protected IWordDetector wordDetector; - private boolean atWhitepsaceEnd; - public StarRule(Mark mark, IWhitespaceDetector whitespace, IWordDetector word, IToken success) { - isPrevious = mark.isMarkPrevious(); - excludeMatch = mark.getExcludeMatch(); - atLineStart = mark.isAtLineStart(); - atWhitepsaceEnd = mark.atWhitespaceEnd(); - text = mark.getText().toCharArray(); - this.whiteDetector = whitespace; - this.wordDetector = word; - fToken = success; - } - /** - * Sets a column constraint for this rule. If set, the rule's token - * will only be returned if the pattern is detected starting at the - * specified column. If the column is smaller then 0, the column - * constraint is considered removed. - * - * @param column the column in which the pattern starts - */ - public void setColumnConstraint(int column) { - if (column < 0) - column = UNDEFINED; - fColumn = column; - } - /** - * Evaluates this rules without considering any column constraints. - * - * @param scanner the character scanner to be used - * @return the token resulting from this evaluation - */ - protected IToken doEvaluate(ICharacterScanner scanner) { - if (isPrevious) - return doEvaluatePrevious(scanner, false); - return doEvaluateFollowing(scanner, false); - } - /** - * Evaluates this rules without considering any column constraints. Resumes - * detection, i.e. looks only for the end sequence required by this rule if the - * <code>resume</code> flag is set. - * - * @param scanner the character scanner to be used - * @param resume <code>true</code> if detection should be resumed, <code>false</code> otherwise - * @return the token resulting from this evaluation - * @since 2.0 - */ - protected IToken doEvaluatePrevious(ICharacterScanner scanner, boolean resume) { - if (resume) { - if (sequenceDetectedPrevious(scanner, false)) - return fToken; - } else { - char c = (char) scanner.read(); - /* Mark Previous :: c is either the end of the pattern, - * some other char, EOL, whitespace or EOF */ - if (c == ICharacterScanner.EOF) - return Token.UNDEFINED; - if (c == text[0]) { - if (sequenceDetectedPrevious(scanner, false)) - return fToken; - } - } - scanner.unread(); - return Token.UNDEFINED; - } - /** - * Evaluates this rules without considering any column constraints. Resumes - * detection, i.e. looks only for the end sequence required by this rule if the - * <code>resume</code> flag is set. - * - * @param scanner the character scanner to be used - * @param resume <code>true</code> if detection should be resumed, <code>false</code> otherwise - * @return the token resulting from this evaluation - * @since 2.0 - */ - protected IToken doEvaluateFollowing(ICharacterScanner scanner, boolean resume) { - if (resume) { - if (sequenceDetectedFollowing(scanner, false)) - return fToken; - } else { - char c = (char) scanner.read(); - /* Mark Previous :: c is either the end of the pattern, - * some other char, EOL, whitespace or EOF */ - if (c == ICharacterScanner.EOF) - return Token.UNDEFINED; - if (c == text[0]) { - if (sequenceDetectedFollowing(scanner, false)) - return fToken; - } - } - scanner.unread(); - return Token.UNDEFINED; - } - /* - * @see IRule#evaluate - */ - public IToken evaluate(ICharacterScanner aScanner) { - IToken answer = evaluate(aScanner, false); - if (answer == Token.UNDEFINED) - return Token.UNDEFINED; - if (!(aScanner instanceof ColoringPartitionScanner)) - return answer; - ColoringPartitionScanner scanner = (ColoringPartitionScanner) aScanner; - if (isPrevious) { - int tokenLength = startOfToken(scanner); - scanner.moveTokenOffset(- (tokenLength - text.length)); - // scanner.markLength = excludeMatch ? tokenLength - text.length: tokenLength; - scanner.setMarkLength(tokenLength); - } - return answer; - } - private int startOfToken(ColoringPartitionScanner scanner) { - int original = scanner.getOffset(); - scanner.backup(); // this backs up to the MARK tag, for example ( - int c = scanner.backup(); - while (c != ICharacterScanner.EOF && !whiteDetector.isWhitespace((char) c) && wordDetector.isWordPart((char) c)) { - c = scanner.backup(); - } - int start = scanner.getOffset(); - // Restore the offset - scanner.setOffset(original); - return start == 0 ? original - start : original - start - 1; - } - /** - * Returns whether the next characters to be read by the character scanner - * are an exact match with the given sequence. No escape characters are allowed - * within the sequence. If specified the sequence is considered to be found - * when reading the EOF character. - * - * @param scanner the character scanner to be used - * @param sequence the sequence to be detected - * @param eofAllowed indicated whether EOF terminates the pattern - * @return <code>true</code> if the given sequence has been detected - */ - protected boolean sequenceDetectedFollowing(ICharacterScanner scanner, boolean eofAllowed) { - int c; - int read = 1; - for (; read < text.length; read++) { - c = scanner.read(); - if (c == ICharacterScanner.EOF && eofAllowed) { - return true; - } else if (c != text[read]) { - // Non-matching character detected, rewind the scanner back to the start. - // Do not unread the first character. - scanner.unread(); - for (int j = read - 1; j > 0; j--) - scanner.unread(); - return false; - } - } - - // Inserted by EPIC -- START - if (atWhitepsaceEnd) { - c = scanner.read(); - scanner.unread(); - //TODO EPIC specific SHOULD BE REMOVED if it works correct in ColorEditor - if (whiteDetector.isWhitespace((char) c) || c == ICharacterScanner.EOF) { - scanner.unread(); // <-- EPIC: Rewind scanner if whitespace character - return true; - } - for (int j = read - 1; j > 0; j--) - scanner.unread(); - return false; - } - // Inserted by EPIC -- END - - // scan until we hit whitespace or EOF - read = 1; - c = scanner.read(); - while(c != ICharacterScanner.EOF) { - if (whiteDetector.isWhitespace((char) c) || c == ICharacterScanner.EOF) { - scanner.unread(); - return true; - } - c = scanner.read(); - read++; - } - // Non-matching character detected, rewind the scanner back to the start. - // Do not unread the first character. - for (int j = read - 1; j > 0; j--) - scanner.unread(); - return false; - } - /** - * Returns whether the next characters to be read by the character scanner - * are an exact match with the given sequence. No escape characters are allowed - * within the sequence. If specified the sequence is considered to be found - * when reading the EOF character. - * - * @param scanner the character scanner to be used - * @param sequence the sequence to be detected - * @param eofAllowed indicated whether EOF terminates the pattern - * @return <code>true</code> if the given sequence has been detected - */ - protected boolean sequenceDetectedPrevious(ICharacterScanner scanner, boolean eofAllowed) { - int c; - int read = 1; - for (; read < text.length; read++) { - c = scanner.read(); - if (c == ICharacterScanner.EOF && eofAllowed) { - return true; - } else if (c != text[read]) { - // Non-matching character detected, rewind the scanner back to the start. - // Do not unread the first character. - scanner.unread(); - for (int j = read - 1; j > 0; j--) - scanner.unread(); - return false; - } - } - if (atWhitepsaceEnd) { - c = scanner.read(); - scanner.unread(); - if (whiteDetector.isWhitespace((char) c) || c == ICharacterScanner.EOF) - return true; - for (int j = read - 1; j > 0; j--) - scanner.unread(); - return false; - } - return true; - } - /* - * @see IPredicateRule#evaluate(ICharacterScanner, boolean) - * @since 2.0 - */ - public IToken evaluate(ICharacterScanner scanner, boolean resume) { - if (fColumn == UNDEFINED) - return isPrevious ? doEvaluatePrevious(scanner, resume) : doEvaluateFollowing(scanner, resume); - int c = scanner.read(); - scanner.unread(); - if (c == text[0]) - return (fColumn == scanner.getColumn() ? (isPrevious ? doEvaluatePrevious(scanner, resume) : doEvaluateFollowing(scanner, resume)) : Token.UNDEFINED); - else - return Token.UNDEFINED; - } - /* - * @see IPredicateRule#getSuccessToken() - * @since 2.0 - */ - public IToken getSuccessToken() { - return fToken; - } -} +package cbg.editor.rules; +import org.eclipse.jface.text.rules.ICharacterScanner; +import org.eclipse.jface.text.rules.IPredicateRule; +import org.eclipse.jface.text.rules.IToken; +import org.eclipse.jface.text.rules.IWhitespaceDetector; +import org.eclipse.jface.text.rules.IWordDetector; +import org.eclipse.jface.text.rules.Token; +import cbg.editor.ColoringPartitionScanner; +import cbg.editor.jedit.Mark; +public class StarRule implements IPredicateRule { + protected boolean isPrevious, excludeMatch, atLineStart; + protected char[] text; + protected static final int UNDEFINED = -1; + /** The token to be returned on success */ + protected IToken fToken; + /** The pattern's column constrain */ + protected int fColumn = UNDEFINED; + /** The pattern's escape character */ + protected char fEscapeCharacter; + /** Indicates whether end of line termines the pattern */ + protected boolean fBreaksOnEOL; + protected IWhitespaceDetector whiteDetector; + protected IWordDetector wordDetector; + private boolean atWhitepsaceEnd; + public StarRule(Mark mark, IWhitespaceDetector whitespace, IWordDetector word, IToken success) { + isPrevious = mark.isMarkPrevious(); + excludeMatch = mark.getExcludeMatch(); + atLineStart = mark.isAtLineStart(); + atWhitepsaceEnd = mark.atWhitespaceEnd(); + text = mark.getText().toCharArray(); + this.whiteDetector = whitespace; + this.wordDetector = word; + fToken = success; + } + /** + * Sets a column constraint for this rule. If set, the rule's token + * will only be returned if the pattern is detected starting at the + * specified column. If the column is smaller then 0, the column + * constraint is considered removed. + * + * @param column the column in which the pattern starts + */ + public void setColumnConstraint(int column) { + if (column < 0) + column = UNDEFINED; + fColumn = column; + } + /** + * Evaluates this rules without considering any column constraints. + * + * @param scanner the character scanner to be used + * @return the token resulting from this evaluation + */ + protected IToken doEvaluate(ICharacterScanner scanner) { + if (isPrevious) + return doEvaluatePrevious(scanner, false); + return doEvaluateFollowing(scanner, false); + } + /** + * Evaluates this rules without considering any column constraints. Resumes + * detection, i.e. looks only for the end sequence required by this rule if the + * <code>resume</code> flag is set. + * + * @param scanner the character scanner to be used + * @param resume <code>true</code> if detection should be resumed, <code>false</code> otherwise + * @return the token resulting from this evaluation + * @since 2.0 + */ + protected final IToken doEvaluatePrevious(ICharacterScanner scanner, boolean resume) { + if (resume) { + if (sequenceDetectedPrevious(scanner, false)) + return fToken; + } else { + char c = (char) scanner.read(); + /* Mark Previous :: c is either the end of the pattern, + * some other char, EOL, whitespace or EOF */ + if (c == ICharacterScanner.EOF) + return Token.UNDEFINED; + if (c == text[0]) { + if (sequenceDetectedPrevious(scanner, false)) + return fToken; + } + } + scanner.unread(); + return Token.UNDEFINED; + } + /** + * Evaluates this rules without considering any column constraints. Resumes + * detection, i.e. looks only for the end sequence required by this rule if the + * <code>resume</code> flag is set. + * + * @param scanner the character scanner to be used + * @param resume <code>true</code> if detection should be resumed, <code>false</code> otherwise + * @return the token resulting from this evaluation + * @since 2.0 + */ + protected final IToken doEvaluateFollowing(ICharacterScanner scanner, boolean resume) { + if (resume) { + if (sequenceDetectedFollowing(scanner, false)) + return fToken; + } else { + char c = (char) scanner.read(); + /* Mark Previous :: c is either the end of the pattern, + * some other char, EOL, whitespace or EOF */ + if (c == ICharacterScanner.EOF) + return Token.UNDEFINED; + if (c == text[0]) { + if (sequenceDetectedFollowing(scanner, false)) + return fToken; + } + } + scanner.unread(); + return Token.UNDEFINED; + } + /* + * @see IRule#evaluate + */ + public IToken evaluate(ICharacterScanner aScanner) { + IToken answer = evaluateFinally(aScanner, false); + if (answer == Token.UNDEFINED) + return Token.UNDEFINED; + if (!(aScanner instanceof ColoringPartitionScanner)) + return answer; + ColoringPartitionScanner scanner = (ColoringPartitionScanner) aScanner; + if (isPrevious) { + int tokenLength = startOfToken(scanner); + scanner.moveTokenOffset(- (tokenLength - text.length)); + // scanner.markLength = excludeMatch ? tokenLength - text.length: tokenLength; + scanner.setMarkLength(tokenLength); + } + return answer; + } + private int startOfToken(ColoringPartitionScanner scanner) { + int original = scanner.getOffset(); + scanner.backup(); // this backs up to the MARK tag, for example ( + int c = scanner.backup(); + while (c != ICharacterScanner.EOF && !whiteDetector.isWhitespace((char) c) && wordDetector.isWordPart((char) c)) { + c = scanner.backup(); + } + int start = scanner.getOffset(); + // Restore the offset + scanner.setOffset(original); + return start == 0 ? original - start : original - start - 1; + } + /** + * Returns whether the next characters to be read by the character scanner + * are an exact match with the given sequence. No escape characters are allowed + * within the sequence. If specified the sequence is considered to be found + * when reading the EOF character. + * + * @param scanner the character scanner to be used + * @param sequence the sequence to be detected + * @param eofAllowed indicated whether EOF terminates the pattern + * @return <code>true</code> if the given sequence has been detected + */ + protected final boolean sequenceDetectedFollowing(ICharacterScanner scanner, boolean eofAllowed) { + int c; + int read = 1; + for (; read < text.length; read++) { + c = scanner.read(); + if (c == ICharacterScanner.EOF && eofAllowed) { + return true; + } else if (c != text[read]) { + // Non-matching character detected, rewind the scanner back to the start. + // Do not unread the first character. + scanner.unread(); + for (int j = read - 1; j > 0; j--) + scanner.unread(); + return false; + } + } + + // Inserted by EPIC -- START + if (atWhitepsaceEnd) { + c = scanner.read(); + scanner.unread(); + //TODO EPIC specific SHOULD BE REMOVED if it works correct in ColorEditor + if (whiteDetector.isWhitespace((char) c) || c == ICharacterScanner.EOF) { + scanner.unread(); // <-- EPIC: Rewind scanner if whitespace character + return true; + } + for (int j = read - 1; j > 0; j--) + scanner.unread(); + return false; + } + // Inserted by EPIC -- END + + // scan until we hit whitespace or EOF + read = 1; + c = scanner.read(); + while(c != ICharacterScanner.EOF) { + if (whiteDetector.isWhitespace((char) c) || c == ICharacterScanner.EOF) { + scanner.unread(); + return true; + } + c = scanner.read(); + read++; + } + // Non-matching character detected, rewind the scanner back to the start. + // Do not unread the first character. + for (int j = read - 1; j > 0; j--) + scanner.unread(); + return false; + } + /** + * Returns whether the next characters to be read by the character scanner + * are an exact match with the given sequence. No escape characters are allowed + * within the sequence. If specified the sequence is considered to be found + * when reading the EOF character. + * + * @param scanner the character scanner to be used + * @param sequence the sequence to be detected + * @param eofAllowed indicated whether EOF terminates the pattern + * @return <code>true</code> if the given sequence has been detected + */ + protected boolean sequenceDetectedPrevious(ICharacterScanner scanner, boolean eofAllowed) { + int c; + int read = 1; + for (; read < text.length; read++) { + c = scanner.read(); + if (c == ICharacterScanner.EOF && eofAllowed) { + return true; + } else if (c != text[read]) { + // Non-matching character detected, rewind the scanner back to the start. + // Do not unread the first character. + scanner.unread(); + for (int j = read - 1; j > 0; j--) + scanner.unread(); + return false; + } + } + if (atWhitepsaceEnd) { + c = scanner.read(); + scanner.unread(); + if (whiteDetector.isWhitespace((char) c) || c == ICharacterScanner.EOF) + return true; + for (int j = read - 1; j > 0; j--) + scanner.unread(); + return false; + } + return true; + } + /* + * @see IPredicateRule#evaluate(ICharacterScanner, boolean) + * @since 2.0 + */ public IToken evaluate(ICharacterScanner scanner, boolean resume) { return evaluateFinally( scanner, resume); } + public final IToken evaluateFinally(ICharacterScanner scanner, boolean resume) { + if (fColumn == UNDEFINED) + return isPrevious ? doEvaluatePrevious(scanner, resume) : doEvaluateFollowing(scanner, resume); + int c = scanner.read(); + scanner.unread(); + if (c == text[0]) + return (fColumn == scanner.getColumn() ? (isPrevious ? doEvaluatePrevious(scanner, resume) : doEvaluateFollowing(scanner, resume)) : Token.UNDEFINED); + else + return Token.UNDEFINED; + } + /* + * @see IPredicateRule#getSuccessToken() + * @since 2.0 + */ + public IToken getSuccessToken() { + return fToken; + } +} Index: ExtendedPatternRule.java =================================================================== RCS file: /cvsroot/e-p-i-c/org.epic.ext.cbg.editor/src/cbg/editor/rules/ExtendedPatternRule.java,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- ExtendedPatternRule.java 6 Feb 2005 08:22:47 -0000 1.5 +++ ExtendedPatternRule.java 26 Mar 2005 07:25:19 -0000 1.6 @@ -26,16 +26,20 @@ private int myStepCounter = 0; private int noMultipleEndTag; private boolean requireEndTag, requireBeforeWhitespace, requireAfterWhitespace; - private final String optinalModifiers; + private final String optinalModifiers, rejectDelimWithFirstChar; private char curScannerChar; private final char EOFChar= (char) ICharacterScanner.EOF; private int noDynamicDelimiterChars=0; - private final String countDelimterChars, requireBeforeTag, requireAfterTag, dynamicEndTerminate; + private final String requireBeforeTag, requireAfterTag, dynamicEndTerminate; private boolean continueCheck = true; + private final boolean ignoreDelimAsLetterOrDigit, rquDelimAsNonInteger, rquDelimNonEscape; + private final boolean ignoreEscape; + private final boolean acceptWhiteSpaceBefore, transformEmptyDelimiter; private IWhitespaceDetector whiteSpace; public ExtendedPatternRule(String startSequence, String endSequence, IToken token, - char escapeCharacter, boolean breaksOnEOL, int noMaxChar, String[] groupContent, + char escapeCharacter, boolean breaksOnEOL, int noMaxChar, String rejectDelimWithFirstChar, + String[] groupContent, boolean bracketMatch, int noMultipleEndTag, boolean requireEndTag, boolean CaseInSensitive, boolean isDynamicTagging, String countDelimterChars, String beforeTag, String afterTag, @@ -81,12 +85,54 @@ isCaseInSensitive = false; //case-sensitive is nonsense with dynamic Tags } - this.countDelimterChars = countDelimterChars; //Programmers lazyness: we check only if content will exists!!! + if (countDelimterChars.length() == 0) { + ignoreDelimAsLetterOrDigit = false; + rquDelimAsNonInteger = false; + rquDelimNonEscape = false; + transformEmptyDelimiter = false; + acceptWhiteSpaceBefore = true; + ignoreEscape = false; + } else { + //works + if (countDelimterChars.indexOf(":NONINTEGER:") >= 0) { + rquDelimAsNonInteger = true; + } else { + rquDelimAsNonInteger = false; + } + //should work + if (countDelimterChars.indexOf(":NO_LETTER_OR_DIGITS:") >= 0) { + ignoreDelimAsLetterOrDigit = true; + } else { + ignoreDelimAsLetterOrDigit = false; + } + //works + if (countDelimterChars.indexOf(":ESCAPE_AND_DELIM_AS_ONE:") >= 0) { + rquDelimNonEscape = true; + ignoreEscape = false; + } else { + rquDelimNonEscape = false; + ignoreEscape = false; + } + //works + if (countDelimterChars.indexOf(":EMPTY_AS_LINEFEED:") >= 0) { + transformEmptyDelimiter = true; + } else { + transformEmptyDelimiter = false; + } + //works + if (countDelimterChars.indexOf(":NO_WHITESPACE_BEFORE_DELIM:") >= 0) { + acceptWhiteSpaceBefore = false; + } else { + acceptWhiteSpaceBefore = true; + } + } + this.requireBeforeTag = beforeTag; this.requireAfterTag = afterTag; this.requireBeforeWhitespace = requireBeforeWhitespace ; this.requireAfterWhitespace = requireAfterWhitespace; this.optinalModifiers = optinalModifiers; + this.rejectDelimWithFirstChar = rejectDelimWithFirstChar; if (isCaseInSensitive) { //rewrite the values for caseInSensitive!!! @@ -108,16 +154,9 @@ IToken myResultToken=Token.UNDEFINED; myStepCounter = 0; if (resume) { - if (isBracketMatch || isMultiple) { - //we have to search back to the beginning of the partion and then start the scanning!!! - unwindToStartToken(scanner); - } else { - if (endSequenceDetected(scanner)) { - return fToken; - } else { - continueCheck = false; - } - } + //we have to search back to the beginning of the partion and then start the scanning!!! + //in case the end of the partion has been shifted to a point before the current position. + unwindToStartToken(scanner); } else { if (isDynamicTagging || requireBeforeWhitespace) { if (((ColoringPartitionScanner) scanner).getOffset() > 0) { @@ -127,7 +166,9 @@ //we do not check anything, since the leading char before this is not //whitespace or equivalent, so we could assume a single keyword for //dynamic tagging!!! - continueCheck = false; + //for speed improvements we return immediately + // continueCheck = false; + return myResultToken; } } } @@ -135,19 +176,8 @@ if (continueCheck) { if (isExistingGroup) { if (forwardStartSequenceDetected(scanner)) { - if (endCheck(scanner, resume)) { - if (optinalModifiers.length() > 0) { - /* - * We have already found the char, we only search forward for optional - * modifiers - */ - curScannerChar = (char) scanner.read(); - while (curScannerChar != EOFChar - && optinalModifiers.indexOf( curScannerChar ) >=0) { - curScannerChar = (char) scanner.read(); - } - scanner.unread() ; - } + if (endCheck(scanner, resume) && myStepCounter > 0) { + includeOptionalModifiers(scanner); return fToken; } } @@ -160,7 +190,8 @@ } if (curScannerChar == fStartSequence[0]) { if (sequenceDetected(scanner, fStartSequence, fBreaksOnEOF)) { - if (endCheck(scanner, resume)) { + if (endCheck(scanner, resume) && myStepCounter > 0) { + includeOptionalModifiers(scanner); return fToken; } } @@ -172,6 +203,27 @@ } /** + * Mark as well optional Modifiers after a tag + * Assumption: The tag was found, otherwise this method makes no sense at all + * + * @param scanner + */ + private void includeOptionalModifiers(ICharacterScanner scanner) { + if (optinalModifiers.length() > 0) { + /* + * We have already found the char, we only search forward for optional + * modifiers + */ + curScannerChar = (char) scanner.read(); + while (curScannerChar != EOFChar + && optinalModifiers.indexOf( curScannerChar ) >=0) { + curScannerChar = (char) scanner.read(); + } + scanner.unread() ; + } + } + + /** * This method is mainly for simple handling of the doEvaluate-issue */ private final boolean endCheck(ICharacterScanner scanner, boolean resume) { @@ -217,9 +269,15 @@ curScannerChar = (char) scanner.read(); myStepCounter++; int thisCounter = noDynamicDelimiterChars; + //skip over the Whitespaces in front of the Delim while (Character.isWhitespace(curScannerChar) && curScannerChar != EOFChar) { - curScannerChar = (char) scanner.read(); - myStepCounter++; + if (acceptWhiteSpaceBefore) { + curScannerChar = (char) scanner.read(); + myStepCounter++; + } else { + fEndSequence="".toCharArray(); + return false; + } } if (requireBeforeTag.length() > 0) { if (requireBeforeTag.charAt(0) !=curScannerChar) { @@ -230,14 +288,23 @@ myStepCounter++; } } + + //if we have existing Delims with starting chars to reject, lets test + if (rejectDelimWithFirstChar.length() > 0) { + if (rejectDelimWithFirstChar.indexOf(curScannerChar) >=0 ) { + fEndSequence = "".toCharArray(); + return false; + } + } + boolean previousCharWasEscape=false; while (--thisCounter >= 0 && !Character.isWhitespace(curScannerChar) && - !(countDelimterChars.length() == 0 + !(ignoreDelimAsLetterOrDigit && Character.isLetterOrDigit(curScannerChar)) && curScannerChar != EOFChar ) { - if (curScannerChar == fEscapeCharacter) { + if (curScannerChar == fEscapeCharacter && rquDelimNonEscape) { previousCharWasEscape = true; } else if (dynamicEndTerminate.indexOf(curScannerChar) < 0) { if (previousCharWasEscape) { @@ -266,13 +333,25 @@ if (previousCharWasEscape) { tmpEnd.append(fEscapeCharacter); } - + scanner.unread(); myStepCounter--; - if (tmpEnd.length() == 0 && countDelimterChars.length() > 0 ) { + if (tmpEnd.length() == 0 && transformEmptyDelimiter ) { //Transform the empty string only if countDelimterChars tmpEnd.append(((ColoringPartitionScanner) scanner).getCurrentLineDelimiter()); } + + //should we reject any kind of Integer-Delimiters? + if (rquDelimAsNonInteger) { + try { + Integer.parseInt(tmpEnd + ""); + fEndSequence="".toCharArray(); + return false; + } + catch (NumberFormatException e) { + // TODO: handle exception + } + } if (requireAfterTag.length() > 0) { /* TODO @@ -415,7 +494,7 @@ char[][] delimiters= scanner.getLegalLineDelimiters(); while ((curScannerChar=(char) scanner.read()) != EOFChar) { myStepCounter++; - if (curScannerChar == fEscapeCharacter) { + if (curScannerChar == fEscapeCharacter && ignoreEscape) { // Skip the escaped character. curScannerChar=(char) scanner.read(); if (curScannerChar == EOFChar) { @@ -457,7 +536,7 @@ char[] lineDelimiter=((ColoringPartitionScanner) scanner).getCurrentLineDelimiter().toCharArray(); boolean previousWasEscapeCharacter = false; while (curScannerChar != EOFChar) { - if (curScannerChar == fEscapeCharacter) { + if (curScannerChar == fEscapeCharacter && ignoreEscape) { // Skip the escaped character. curScannerChar = (char) scanner.read(); if (curScannerChar == EOFChar) { |