From: <cr...@us...> - 2009-02-07 06:23:19
|
Revision: 5001 http://jnode.svn.sourceforge.net/jnode/?rev=5001&view=rev Author: crawley Date: 2009-02-07 06:23:16 +0000 (Sat, 07 Feb 2009) Log Message: ----------- Implemented ${...<op>...} where <op> is '#', '##', '%' or '%%'. Modified Paths: -------------- trunk/shell/src/shell/org/jnode/shell/PathnamePattern.java trunk/shell/src/shell/org/jnode/shell/bjorne/BjorneContext.java trunk/shell/src/test/org/jnode/test/shell/bjorne/bjorne-shell-tests.xml Modified: trunk/shell/src/shell/org/jnode/shell/PathnamePattern.java =================================================================== --- trunk/shell/src/shell/org/jnode/shell/PathnamePattern.java 2009-02-07 06:21:18 UTC (rev 5000) +++ trunk/shell/src/shell/org/jnode/shell/PathnamePattern.java 2009-02-07 06:23:16 UTC (rev 5001) @@ -43,28 +43,36 @@ * character class "[abz]" matches one of "a", "b" or "z". Ranges are allowed, * so that "[0-9A-F]" matches a hexadecimal digit. If the first character of a * character class is "!" or "^", the character class is negated; i.e. - * "[^a-zA-Z]" matches any chatacter that is not an ASCII letter. + * "[^a-zA-Z]" matches any character that is not an ASCII letter. * <li>A single quote ("'") causes characters up to the next "'" to be treated * as literal characters. * <li>A backslash ("\") causes the next character (even a single quote) to be * treated as a literal character; i.e. any special meaning. * </ul> - * + * <p> * Patterns are first split into file components on "/" boundaries, then the * sub-patterns are used to match names in a given directory. Neither quoting or * escaping affect "/" interpretation, and a "/" in a character class causes it * to be treated as literal characters. - * + * <p> * The pattern expander treats "dot" files (i.e. files starting with ".") as * hidden. A hidden file is only matched when the pattern has an explicit "." as * the first character of a component. Thus the pattern "*" does not match "." * or "..", but the pattern ".*" does. - * + * <p> + * This class also exposes a static method for compiling patterns in the UNIX + * shell-style syntax to Java {@link Pattern} objects. The resulting + * objects allow you to use the shell-style syntax for matching arbitrary + * strings. The pathname-specific matching behaviors of PathnamePattern + * such as implicit anchoring, and the handling of '/' in character classes + * are supported via flags. + * <p> * TODO: * <ul> * <li>Provide a method that returns a "lazy" pathname iterator for cases where * we don't want to build a (potentially huge) in-memory list of pathnames. - * <li>Support expansions of ~ and {..,..} patterns. + * <li>Support expansions of ~ and {..,..} patterns. (Note that the latter are + * not part of the POSIX specification.) * <li>Add a parameter (or parameters) to allow the caller to limit the size of * the result list. * </ul> @@ -100,7 +108,7 @@ * character. For example, the sequence "\*" in a pattern will match a "*" * character in a filename. */ - public static final int SLASH_ESCAPES = 0x08; + public static final int BACKSLASH_ESCAPES = 0x08; /** * When set, this flag causes characters inside matching single-quote @@ -115,9 +123,36 @@ * recognized. */ public static final int CHARACTER_CLASSES = 0x20; + + /** + * When set, the pattern is anchored to the left of the string to be searched. + * This is set implicitly by the pathname matching methods. + */ + public static final int ANCHOR_LEFT = 0x40; + + /** + * When set, the pattern is anchored to the right of the string to be searched. + * This is set implicitly by the pathname matching methods. + */ + public static final int ANCHOR_RIGHT = 0x80; + + /** + * When set, '*' is eager, matching as many characters as possible. + * This is set implicitly by the pathname matching methods. + * matching is always eager. + */ + public static final int EAGER = 0x100; + + /** + * When set, an unescaped '/' inside a character class causes the entire class + * to be interpreted as a literal character sequence. + * This is set implicitly by the pathname matching methods. + */ + public static final int SLASH_DISABLES_CHARACTER_CLASSES = 0x200; + public static final int DEFAULT_FLAGS = SORT_MATCHES | HIDE_DOT_FILENAMES - | INCLUDE_DOT_AND_DOTDOT | SLASH_ESCAPES | SINGLE_QUOTE_ESCAPES + | INCLUDE_DOT_AND_DOTDOT | BACKSLASH_ESCAPES | SINGLE_QUOTE_ESCAPES | CHARACTER_CLASSES; private static final boolean DEBUG = false; @@ -127,7 +162,7 @@ private final boolean isAbsolute; // Use a weak reference for the pattern cache to avoid storage leakage. - private static WeakReference<HashMap<String, PathnamePattern>> compiledPatterns; + private static WeakReference<HashMap<String, PathnamePattern>> cache; private PathnamePattern(String source, Object[] pattern, boolean isAbsolute) { this.source = source; @@ -189,8 +224,7 @@ } }; // A directory's "." and ".." entries are not returned by - // File.listFiles - // so we have to match / add them explicitly. + // File.listFiles so we have to match / add them explicitly. if ((flags & INCLUDE_DOT_AND_DOTDOT) != 0) { if (filter.accept(current, ".")) { matches.add(new File(current, ".")); @@ -228,8 +262,8 @@ * @param source the pattern source * @return a compiler pattern for the source. */ - public static PathnamePattern compile(String source) { - return compile(source, DEFAULT_FLAGS); + public static PathnamePattern compilePathPattern(String source) { + return compilePathPattern(source, DEFAULT_FLAGS); } /** @@ -242,12 +276,11 @@ * @param flags pattern compilation flags * @return a compiler pattern for the source. */ - public static PathnamePattern compile(String source, int flags) { + public static PathnamePattern compilePathPattern(String source, int flags) { String key = flags + ":" + source; synchronized (PathnamePattern.class) { HashMap<String, PathnamePattern> cp; - if (compiledPatterns != null - && (cp = compiledPatterns.get()) != null) { + if (cache != null && (cp = cache.get()) != null) { PathnamePattern pat = cp.get(key); if (pat != null) { return pat; @@ -268,19 +301,22 @@ Object[] res = new Object[parts.length]; for (int i = 0; i < parts.length; i++) { String part = parts[i]; - res[i] = (isPattern(part, flags)) ? Pattern.compile(createRegex( - part, flags)) : part; - if (DEBUG) + if (isPattern(part, flags)) { + res[i] = compilePosixShellPattern(part, + flags | ANCHOR_LEFT | ANCHOR_RIGHT | EAGER | SLASH_DISABLES_CHARACTER_CLASSES); + } else { + res[i] = part; + } + if (DEBUG) { System.err.println(i + ": " + res[i]); + } } PathnamePattern pat = new PathnamePattern(source, res, isAbsolute); synchronized (PathnamePattern.class) { HashMap<String, PathnamePattern> cp = null; - if (compiledPatterns == null - || (cp = compiledPatterns.get()) == null) { + if (cache == null || (cp = cache.get()) == null) { cp = new HashMap<String, PathnamePattern>(); - compiledPatterns = new WeakReference<HashMap<String, PathnamePattern>>( - cp); + cache = new WeakReference<HashMap<String, PathnamePattern>>(cp); } cp.put(key, pat); } @@ -320,7 +356,7 @@ } break; case '\\': - if ((flags & SLASH_ESCAPES) != 0) { + if ((flags & BACKSLASH_ESCAPES) != 0) { return true; } break; @@ -336,20 +372,25 @@ } /** - * Turn a string representing a pathname component into a regex. + * Turn a string in POSIX shell pattern syntax into a regex. This method + * generates a {@link Pattern} that can be matched against a character sequence. * - * @param filePattern the pathname pattern component - * @return the corresponding regex. + * @param pattern the pattern in shell syntax. + * @return the corresponding regex as a {@link Pattern}. */ - private static String createRegex(String filePattern, int flags) { + public static Pattern compilePosixShellPattern(String pattern, int flags) { // This method needs to be really careful to avoid 'ordinary' characters // in the source pattern being accidentally mapped to Java regex // meta-characters. - int len = filePattern.length(); + int len = pattern.length(); StringBuffer sb = new StringBuffer(len); boolean quoted = false; + boolean eager = (flags & EAGER) != 0; + if ((flags & ANCHOR_LEFT) != 0) { + sb.append('^'); + } for (int i = 0; i < len; i++) { - char ch = filePattern.charAt(i); + char ch = pattern.charAt(i); switch (ch) { case '?': if (quoted) { @@ -364,23 +405,24 @@ if (quoted) { sb.append(ch); } else if (i == 0 && (flags & HIDE_DOT_FILENAMES) != 0) { - sb.append("(|[^\\.].*)"); + sb.append("(|[^\\.]").append(eager ? ".*" : ".*?").append(")"); } else { - sb.append(".*"); + sb.append(eager ? ".*" : ".*?"); } break; case '[': if ((flags & CHARACTER_CLASSES) != 0) { int j; StringBuffer sb2 = new StringBuffer(len); + boolean charClassOK = true; LOOP: for (j = i + 1; j < len; j++) { - char ch2 = filePattern.charAt(j); + char ch2 = pattern.charAt(j); switch (ch2) { case ']': break LOOP; case '\\': - sb2.append(protect(filePattern.charAt(++j))); + sb2.append(protect(pattern.charAt(++j))); break; case '!': case '^': @@ -389,12 +431,19 @@ case '-': sb2.append('-'); break; + case '/': + sb2.append(protect(ch2)); + charClassOK = ((flags & SLASH_DISABLES_CHARACTER_CLASSES) == 0); + break; default: sb2.append(protect(ch2)); } } if (j == len) { - sb.append('['); + sb.append(protect('[')); + } else if (!charClassOK) { + sb.append(protect('[')).append(sb2).append(protect(']')); + i = j; } else { sb.append("[").append(sb2).append(']'); i = j; @@ -404,8 +453,8 @@ } break; case '\\': - if ((flags & SLASH_ESCAPES) != 0) { - sb.append(protect(filePattern.charAt(++i))); + if ((flags & BACKSLASH_ESCAPES) != 0) { + sb.append(protect(pattern.charAt(++i))); } else { sb.append(protect(ch)); } @@ -414,7 +463,6 @@ if ((flags & SINGLE_QUOTE_ESCAPES) != 0) { quoted = !quoted; } else { - sb.append(protect(ch)); } break; @@ -422,7 +470,10 @@ sb.append(protect(ch)); } } - return sb.toString(); + if ((flags & ANCHOR_RIGHT) != 0) { + sb.append('$'); + } + return Pattern.compile(sb.toString()); } private static String protect(char ch) { Modified: trunk/shell/src/shell/org/jnode/shell/bjorne/BjorneContext.java =================================================================== --- trunk/shell/src/shell/org/jnode/shell/bjorne/BjorneContext.java 2009-02-07 06:21:18 UTC (rev 5000) +++ trunk/shell/src/shell/org/jnode/shell/bjorne/BjorneContext.java 2009-02-07 06:23:16 UTC (rev 5001) @@ -43,7 +43,10 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.tools.ant.types.Path; import org.jnode.shell.Command; import org.jnode.shell.CommandLine; import org.jnode.shell.CommandThread; @@ -380,7 +383,7 @@ globbedWordTokens.add(wordToken); return; } - PathnamePattern pattern = PathnamePattern.compile(word); + PathnamePattern pattern = PathnamePattern.compilePathPattern(word); LinkedList<String> paths = pattern.expand(new File(".")); // If it doesn't match anything, a pattern 'expands' to itself. if (paths.isEmpty()) { @@ -775,11 +778,55 @@ } else { return value; } + case HASH: + return patternEdit(value, word, false, false); + case DHASH: + return patternEdit(value, word, false, true); + case PERCENT: + return patternEdit(value, word, true, false); + case DPERCENT: + return patternEdit(value, word, true, true); default: throw new ShellFailureException("not implemented"); } } + private String patternEdit(String value, String pattern, boolean suffix, boolean eager) { + if (value == null || value.length() == 0) { + return ""; + } + if (pattern == null || pattern.length() == 0) { + return value; + } + // FIXME ... this does not work for a suffix == true, eager == false. We + // translate '*' to '.*?', but that won't give us the shortest suffix because + // Patterns inherently match from left to right. + int flags = (suffix ? PathnamePattern.ANCHOR_RIGHT : PathnamePattern.ANCHOR_LEFT) | + (eager ? PathnamePattern.EAGER : 0); + Pattern p = PathnamePattern.compilePosixShellPattern(pattern, + PathnamePattern.DEFAULT_FLAGS | flags); + Matcher m = p.matcher(value); + if (m.find()) { + if (suffix) { + return value.substring(0, m.start()); + } else { + return value.substring(m.end()); + } + } else { + return value; + } + } + + @SuppressWarnings("unused") + private String reverse(String str) { + StringBuilder sb = new StringBuilder(str.length()); + for (int i = str.length() - 1; i >= 0; i--) { + sb.append(str.charAt(i)); + } + return sb.toString(); + } + + private String variable(String parameter) throws ShellSyntaxException { if (parameter.length() == 1) { String tmp = specialVariable(parameter.charAt(0)); Modified: trunk/shell/src/test/org/jnode/test/shell/bjorne/bjorne-shell-tests.xml =================================================================== --- trunk/shell/src/test/org/jnode/test/shell/bjorne/bjorne-shell-tests.xml 2009-02-07 06:21:18 UTC (rev 5000) +++ trunk/shell/src/test/org/jnode/test/shell/bjorne/bjorne-shell-tests.xml 2009-02-07 06:23:16 UTC (rev 5001) @@ -517,4 +517,73 @@ </output> <rc>0</rc> </testSpec> + <testSpec> + <title>${..#..} expansions</title> + <command>test</command> + <runMode>AS_SCRIPT</runMode> + <script>#!bjorne + A=cat + B=caaat + echo A hash ca is ${A#ca} + echo B hash ca is ${B#ca} + echo A hash c\? is ${A#c?} + echo B hash c\? is ${B#c?} + echo A hash c\* is ${A#c*} + echo B hash c\* is ${B#c*} + </script> + <output>A hash ca is t +B hash ca is aat +A hash c? is t +B hash c? is aat +A hash c* is at +B hash c* is aaat +</output> + <rc>0</rc> + </testSpec> + <testSpec> + <title>${..##..} expansions</title> + <command>test</command> + <runMode>AS_SCRIPT</runMode> + <script>#!bjorne + A=cat + B=caaat + echo A hashhash ca is ${A##ca} + echo B hashhash ca is ${B##ca} + echo A hashhash c\? is ${A##c?} + echo B hashhash c\? is ${B##c?} + echo A hashhash c\* is ${A##c*} + echo B hashhash c\* is ${B##c*} + </script> + <output>A hashhash ca is t +B hashhash ca is aat +A hashhash c? is t +B hashhash c? is aat +A hashhash c* is +B hashhash c* is +</output> + <rc>0</rc> + </testSpec> + <testSpec> + <title>${..%..} expansions</title> + <command>test</command> + <runMode>AS_SCRIPT</runMode> + <script>#!bjorne + A=cat + B=caaat + echo A % at is ${A%at} + echo B % at is ${B%at} + echo A % \?t is ${A%?t} + echo B % \?t is ${B%?t} + echo A % \*t is ${A%*t} + echo B % \*t is ${B%*t} + </script> + <output>A % at is c +B % at is caa +A % ?t is c +B % ?t is caa +A % *t is ca +B % *t is caaa +</output> + <rc>0</rc> + </testSpec> </testSpecs> \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |