Menu

#49 regex like time parser

open
nobody
None
5
2012-10-08
2009-05-06
No

Currently DateTimeFormat.forPattern allows very simple exceptions, like "YYYY-MM-dd".

It often desirable to have regex like patterns -- alternations and optionals. Like:

YYYY-?MM-?dd

parses both 2009-04-12 and 20090412

HH:mm(:ss)?

should parse both 12:14 and 12:14:00

YYYY-MM-dd|dd.MM.YYYY

should parse both 2009-04-12 and 12.04.2009

Discussion

  • Stepan Koltsov

    Stepan Koltsov - 2009-09-04

    First version of patch: optional (?) is implemented. Variants (|) is TBD. Documentation is missing too. Please, review.

    ===
    Index: src/test/org/joda/time/format/TestDateTimeFormat.java
    ===================================================================
    --- src/test/org/joda/time/format/TestDateTimeFormat.java (revision 1397)
    +++ src/test/org/joda/time/format/TestDateTimeFormat.java (working copy)
    @@ -27,6 +27,7 @@
    import org.joda.time.DateTimeFieldType;
    import org.joda.time.DateTimeUtils;
    import org.joda.time.DateTimeZone;
    +import org.joda.time.LocalTime;
    import org.joda.time.chrono.GJChronology;

    /**
    @@ -982,6 +983,29 @@
    DateTime date = dateFormatter.parseDateTime(str);
    assertEquals(new DateTime(2007, 3, 8, 22, 0, 0, 0, UTC), date);
    }

    • public void testFormat_optional_longest() {
    • DateTimeFormatter dateFormatter = DateTimeFormat.forPattern("HH:mm(:ss)?")
    • .withLocale(Locale.US).withZone(DateTimeZone.UTC);
    • assertEquals("14:22:03", dateFormatter.print(new LocalTime(14, 22, 3)));
    • }
    • public void testFormatParse_optionalGroup() {
    • DateTimeFormatter dateFormatter = DateTimeFormat.forPattern("YYYY(-MM(-dd)?)?")
    • .withLocale(Locale.US).withZone(DateTimeZone.UTC);
    • assertEquals(new DateTime(2009, 1, 1, 0, 0, 0, 0, UTC), dateFormatter.parseDateTime("2009"));
    • assertEquals(new DateTime(2009, 8, 1, 0, 0, 0, 0, UTC), dateFormatter.parseDateTime("2009-08"));
    • assertEquals(new DateTime(2009, 8, 14, 0, 0, 0, 0, UTC), dateFormatter.parseDateTime("2009-08-14"));
    • }
    • public void testFormatParse_optionalLiteral() {
    • // XXX: optional "-" after year does not work
    • DateTimeFormatter dateFormatter = DateTimeFormat.forPattern("YYYY(-?)MM-?dd")
    • .withLocale(Locale.US).withZone(DateTimeZone.UTC);
    • assertEquals(new DateTime(2009, 8, 14, 0, 0, 0, 0, UTC), dateFormatter.parseDateTime("20090814"));
    • assertEquals(new DateTime(2009, 8, 14, 0, 0, 0, 0, UTC), dateFormatter.parseDateTime("2009-0814"));
    • assertEquals(new DateTime(2009, 8, 14, 0, 0, 0, 0, UTC), dateFormatter.parseDateTime("2009-08-14"));
    • }

      //-----------------------------------------------------------------------
      public void testFormatParse_textHalfdayAM_UK() {
      Index: src/java/org/joda/time/format/DateTimeFormat.java
      ===================================================================
      --- src/java/org/joda/time/format/DateTimeFormat.java (revision 1397)
      +++ src/java/org/joda/time/format/DateTimeFormat.java (working copy)
      @@ -374,7 +374,9 @@

      • @throws IllegalArgumentException if the pattern is invalid
        */
        static void appendPatternTo(DateTimeFormatterBuilder builder, String pattern) {
      • parsePatternTo(builder, pattern);
      • int pos = parsePatternTo(builder, pattern, 0);
      • if (pos != pattern.length())
      • throw new IllegalArgumentException("got unparsed pattern part: " + pattern.substring(pos));
        }

      //-----------------------------------------------------------------------
      @@ -395,12 +397,13 @@

      • @param pattern pattern specification
      • @throws IllegalArgumentException if the pattern is invalid
      • @see #forPattern
        • @return position where parsing stopped
          */
      • private static void parsePatternTo(DateTimeFormatterBuilder builder, String pattern) {
      • private static int parsePatternTo(DateTimeFormatterBuilder builder, String pattern, int pos) {
        int length = pattern.length();
        int[] indexRef = new int[1];
    • for (int i=0; i<length; i++) {

    • for (int i=pos; i<length; ) {
      indexRef[0] = i;
      String token = parseToken(pattern, indexRef);
      i = indexRef[0];
      @@ -425,15 +428,14 @@
      boolean lenientParse = true;

                   // Peek ahead to next token.
      
    • if (i + 1 < length) {

    • indexRef[0]++;
    • if (isNumericToken(parseToken(pattern, indexRef))) {
    • if (i < length) {
    • String nextToken = parseToken(pattern, indexRef);
    • if (isNumericToken(nextToken) || nextToken.charAt(0) == '(') {
      // If next token is a number, cannot support
      // lenient parse, because it will consume digits
      // that it should not.
      lenientParse = false;
      }
    • indexRef[0]--;
      }

                   // Use pivots which are compatible with SimpleDateFormat.
      

      @@ -453,13 +455,13 @@
      int maxDigits = 9;

                   // Peek ahead to next token.
      
    • if (i + 1 < length) {

    • indexRef[0]++;
    • if (isNumericToken(parseToken(pattern, indexRef))) {
    • // If next token is a number, cannot support long years.
    • if (i < length) {
    • String nextToken = parseToken(pattern, indexRef);
    • if (isNumericToken(nextToken) || nextToken.charAt(0) == '(') {
    • // If next token is a number or group, cannot support long years.
    • // XXX: don't know to make "yyyy-?MM" pattern work
      maxDigits = tokenLen;
      }
    • indexRef[0]--;
      }

                   switch (c) {
      

      @@ -555,11 +557,25 @@
      builder.appendLiteral(new String(sub));
      }
      break;

    • case '(':

    • DateTimeFormatterBuilder innerBuilder = new DateTimeFormatterBuilder();
    • i = parsePatternTo(innerBuilder, pattern, i);
    • if (i == pattern.length() || pattern.charAt(i) != ')')
    • throw new IllegalArgumentException("expecting ')'");
    • builder.append(innerBuilder.toFormatter());
    • i += 1;
    • break;
    • case ')':
    • return i - token.length();
    • case '?':
    • builder.makeTailParserOptional();
    • break;
      default:
      throw new IllegalArgumentException
      ("Illegal pattern component: " + token);
      }
      }
    • return pattern.length();
      }

      /**
      @@ -591,6 +607,8 @@
      break;
      }
      }

    • } else if (c == '(' || c == ')' || c == '|' || c == '?') {

    • buf.append(c);
      } else {
      // This will identify token as text.
      buf.append('\'');
      @@ -609,7 +627,7 @@
      inLiteral = !inLiteral;
      }
      } else if (!inLiteral &&
    • (c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z')) {
    • (c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c == '(' || c == ')' || c == '|' || c == '?')) {
      i--;
      break;
      } else {
      @@ -618,7 +636,7 @@
      }
      }

    • indexRef[0] = i;

    • indexRef[0] = i + 1;
      return buf.toString();
      }

    @@ -679,7 +697,7 @@
    formatter = (DateTimeFormatter) cPatternedCache.get(pattern);
    if (formatter == null) {
    DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();

    • parsePatternTo(builder, pattern);
    • appendPatternTo(builder, pattern);
      formatter = builder.toFormatter();

               cPatternedCache.put(pattern, formatter);
      

      Index: src/java/org/joda/time/format/DateTimeFormatterBuilder.java

      --- src/java/org/joda/time/format/DateTimeFormatterBuilder.java (revision 1397)
      +++ src/java/org/joda/time/format/DateTimeFormatterBuilder.java (working copy)
      @@ -2656,4 +2656,21 @@
      }
      }

    • public void makeTailParserOptional() {

    • if (iElementPairs.isEmpty())
    • throw new IllegalStateException("must be non-empty to make tail optional");
    • DateTimePrinter printer = (DateTimePrinter) iElementPairs.get(iElementPairs.size() - 2);
    • DateTimeParser parser = (DateTimeParser) iElementPairs.get(iElementPairs.size() - 1);
    • iElementPairs.subList(iElementPairs.size() - 2, iElementPairs.size()).clear();
    • if (parser == null)
    • throw new IllegalStateException("no tail parser");
    • if (printer != null) {
    • append(printer, new DateTimeParser[] { parser, null });
    • } else {
    • appendOptional(parser);
    • }
    • }
      +
      }
      ===
     
  • Stepan Koltsov

    Stepan Koltsov - 2009-09-04

    patch v1 — no variants, only optional

     
  • Stepan Koltsov

    Stepan Koltsov - 2009-09-04

    patch v2 — no variants, only optional, pattern yyyy-?MM now works

     
  • Stepan Koltsov

    Stepan Koltsov - 2009-09-04

    patch v3 — all features, not doc yet

     
  • Stepan Koltsov

    Stepan Koltsov - 2009-09-04

    patch v4 — all declared features, tests, doc

     
  • Stepan Koltsov

    Stepan Koltsov - 2009-09-04

    Attached version of the patch with all features implemented, with tests and with documentation.

     
  • Stephen Colebourne

    I see what you are trying to achieve here, and it isn't necessarily a bad idea. It is backwards incompatible however with current patterns.

    If I were to see something like this added to Joda-Time, I'd probably want to follow the model that I'd started in JSR-310.

    So, I can't include it due to compatibility, and I'm not sure whether its too complex for Joda-Time in general.

     
  • Stepan Koltsov

    Stepan Koltsov - 2009-09-24

    Maybe it worth adding another method forPatternExt() that accepts extended pattern, keeping forPatten() intact?

     

Log in to post a comment.

MongoDB Logo MongoDB