From: Shlomy R. <sre...@gm...> - 2008-10-17 19:52:28
|
Hi, Thanks for this observation. Note that HyperSearchRequest.doHyperSearch() uses nextMatch with a different segment of text each time (unless the match was found at the same offset as the previous one). If you change this parameter to nextMatch, maybe you should also run it on the same text each time. Shlomy On Fri, Oct 17, 2008 at 9:23 PM, Kazutoshi Satoda <k_s...@f2...> wrote: > First, very thanks for Matthieu and Shlomy trying hard to solve the > problems about regex zero width match. > > While testing the fixes, I encountered an infinite loop with a regex > replace '\b' -> '|', which is meant as to put '|'s for all word breaks. > ("a b c" -> "|a| |b| |c|") > > I looked into the actual changes of the recent fixes, and felt some > weirdness. Why the nextMatch() method returns the occur which already > found? It should return the *next* match. > > Reading the javadoc for nextMatch(), I found the flag firstTime, and > I was very surprised at the implementation of this method in > PatternSearchMatcher doesn't use the flag at all. And then, I remembered > my own comment for the bug #1671312 saying about the old > RESearchMatcher. > > Finally I made a patch. It does what should be done for the flag > firstTime, and also it drops isMatchingEOL() method because it looked > introduced to fix just a part of the whole problems which has been > caused by ignorance of the flag. > > I did test with the bug #1541009, #2173091, #2173087, #2173112. All > looked OK. And this patch also includes a fix for reverse regex search > (bug #2069249). > > I'll commit this patch in a few days if no one objects. Please test > and report the result. > -- > k_satoda > > Index: org/gjt/sp/jedit/search/PatternSearchMatcher.java > =================================================================== > --- org/gjt/sp/jedit/search/PatternSearchMatcher.java (revision 13901) > +++ org/gjt/sp/jedit/search/PatternSearchMatcher.java (working copy) > @@ -108,22 +108,44 @@ > if (!match.find()) > return null; > > + // if we're not at the start of the buffer, and the pattern > + // begins with "^" and matched the beginning of the region > + // being matched, ignore the match and try the next one. > + if (!start && match.start() == 0 > + && re.pattern().charAt(0) == '^' && !match.find()) > + return null; > + > + // Special care for zero width matches. Without this care, > + // the caller will fall into an infinite loop, for > non-reverse > + // search. > + if (!reverse && !firstTime && match.start() == 0 && > match.end() == 0) > + { > + if (!match.find()) > + return null; > + } > + > + Match previous = null; > while (true) > { > - // if we're not at the start of the buffer, and the > pattern > - // begins with "^" and matched the beginning of the > region > - // being matched, ignore the match and try the next > one. > - if (!start && match.start() == 0 > - && re.pattern().charAt(0) == '^' && > !match.find()) > - return null; > - > - // similarly, if we're not at the end of the buffer > and we > + // if we're not at the end of the buffer and we > // match the end of the text, and the pattern ends > with a "$", > - // return null. > - if (!end && match.end() == text.length() - 1 > + // ignore the match. > + if (!end && match.end() == text.length() > && pattern.charAt(pattern.length() - 1) == > '$') > - return null; > - > + { > + if (previous != null) > + { > + returnValue.start = previous.start; > + returnValue.end = previous.end; > + returnValue.substitutions = > previous.substitutions; > + break; > + } > + else > + { > + return null; > + } > + } > + > returnValue.substitutions = new > String[match.groupCount() + 1]; > for(int i = 0; i < returnValue.substitutions.length; > i++) > { > @@ -141,8 +163,32 @@ > // we continue until no more matches are found > if (!reverse || !match.find()) > { > + // For reverse search, check for zero width > match at > + // the end of text. > + if (reverse && !firstTime && > returnValue.start == text.length() > + && returnValue.end == text.length()) > + { > + if (previous != null) > + { > + returnValue.start = > previous.start; > + returnValue.end = > previous.end; > + returnValue.substitutions = > previous.substitutions; > + } > + else > + { > + return null; > + } > + } > break; > } > + // Save the result for reverse zero width match. > + if (previous == null) > + { > + previous = new Match(); > + } > + previous.start = returnValue.start; > + previous.end = returnValue.end; > + previous.substitutions = returnValue.substitutions; > } > > if (reverse) > @@ -158,13 +204,6 @@ > return returnValue; > } //}}} > > - //{{{ isMatchingEOL() method > - @Override > - public boolean isMatchingEOL() > - { > - return pattern.charAt(pattern.length() - 1) == '$'; > - } //}}} > - > //{{{ toString() method > @Override > public String toString() > Index: org/gjt/sp/jedit/search/SearchMatcher.java > =================================================================== > --- org/gjt/sp/jedit/search/SearchMatcher.java (revision 13901) > +++ org/gjt/sp/jedit/search/SearchMatcher.java (working copy) > @@ -53,22 +53,6 @@ > public abstract Match nextMatch(CharSequence text, boolean start, > boolean end, boolean firstTime, boolean reverse); > > - /** > - * Returns whether the matcher is matching the end of the line > - * character. This should be used to adjust the matched region > - * size when matching the end-of-line character, since it's not > - * included in the matched region returned by the > - * java.util.regex.Pattern matcher. > - * > - * @return Whether the end of the match region will be the EOL > - * character. > - * @since jEdit 4.3pre7 > - */ > - public boolean isMatchingEOL() > - { > - return false; > - } > - > protected Match returnValue; > > //{{{ Match class > Index: org/gjt/sp/jedit/search/SearchAndReplace.java > =================================================================== > --- org/gjt/sp/jedit/search/SearchAndReplace.java (revision 13901) > +++ org/gjt/sp/jedit/search/SearchAndReplace.java (working copy) > @@ -497,16 +497,7 @@ > else > start = 0; > > - boolean _search = true; > - if (!_reverse && > matcher.isMatchingEOL()) > - { > - if (start < > buffer.getLength()) > - start += 1; > - else > - _search = false; > - } > - > - if(_search && > find(view,buffer,start,repeat,_reverse)) > + > if(find(view,buffer,start,repeat,_reverse)) > return true; > } > > @@ -687,25 +678,8 @@ > Selection[] selection = textArea.getSelection(); > if (selection.length == 0) > { > - try > - { > - SearchMatcher matcher = getSearchMatcher(); > - if ((matcher != null) && > (matcher.isMatchingEOL())) > - { > - int caretPosition = > textArea.getCaretPosition(); > - selection = new Selection[] { new > Selection.Range(caretPosition,caretPosition) }; > - } > - else > - { > - view.getToolkit().beep(); > - return false; > - } > - } > - catch (Exception e) > - { > - handleError(comp,e); > - return false; > - } > + view.getToolkit().beep(); > + return false; > } > > record(view,"replace(view)",true,false); > @@ -1180,7 +1154,6 @@ > buffer.getLineOfOffset(end)) - 1 == end); > > int offset = start; > - int oldOffset = -1; > loop: for(int counter = 0; ; counter++) > { > boolean startOfLine = (buffer.getLineStartOffset( > @@ -1205,20 +1178,6 @@ > end += (length - found.length()); > occurCount++; > } > - > - if (matcher.isMatchingEOL()) > - { > - if (offset < buffer.getLength()) > - offset += 1; > - else > - break loop; > - > - if (offset >= end) > - break loop; > - } > - if (offset == oldOffset) > - break loop; > - oldOffset = offset; > } > > return occurCount; > Index: org/gjt/sp/jedit/search/HyperSearchRequest.java > =================================================================== > --- org/gjt/sp/jedit/search/HyperSearchRequest.java (revision 13901) > +++ org/gjt/sp/jedit/search/HyperSearchRequest.java (working copy) > @@ -253,7 +253,6 @@ > int offset = start; > > HyperSearchResult lastResult = null; > - int oldOffset = -1; > loop: for(int counter = 0; ; counter++) > { > boolean startOfLine = > buffer.getLineStartOffset( > @@ -286,16 +285,6 @@ > offset + match.end); > > offset += match.end; > - if (matcher.isMatchingEOL()) > - { > - if (offset < buffer.getLength()) > - offset += 1; > - else > - break loop; > - } > - if (oldOffset == offset) > - break loop; > - oldOffset = offset; > resultCount++; > } > } > > |