Menu

#49 regex like time parser

open
nobody
None
5
2012-10-08
2009-05-06
No

Currently DateTimeFormat.forPattern allows very simple exceptions, like "YYYY-MM-dd".

It often desirable to have regex like patterns -- alternations and optionals. Like:

YYYY-?MM-?dd

parses both 2009-04-12 and 20090412

HH:mm(:ss)?

should parse both 12:14 and 12:14:00

YYYY-MM-dd|dd.MM.YYYY

should parse both 2009-04-12 and 12.04.2009

Discussion

  • Stepan Koltsov

    Stepan Koltsov - 2009-09-04

    First version of patch: optional (?) is implemented. Variants (|) is TBD. Documentation is missing too. Please, review.

    ===
    Index: src/test/org/joda/time/format/TestDateTimeFormat.java
    ===================================================================
    --- src/test/org/joda/time/format/TestDateTimeFormat.java (revision 1397)
    +++ src/test/org/joda/time/format/TestDateTimeFormat.java (working copy)
    @@ -27,6 +27,7 @@
    import org.joda.time.DateTimeFieldType;
    import org.joda.time.DateTimeUtils;
    import org.joda.time.DateTimeZone;
    +import org.joda.time.LocalTime;
    import org.joda.time.chrono.GJChronology;

    /**
    @@ -982,6 +983,29 @@
    DateTime date = dateFormatter.parseDateTime(str);
    assertEquals(new DateTime(2007, 3, 8, 22, 0, 0, 0, UTC), date);
    }
    +
    + public void testFormat_optional_longest() {
    + DateTimeFormatter dateFormatter = DateTimeFormat.forPattern("HH:mm(:ss)?")
    + .withLocale(Locale.US).withZone(DateTimeZone.UTC);
    + assertEquals("14:22:03", dateFormatter.print(new LocalTime(14, 22, 3)));
    + }
    +
    + public void testFormatParse_optionalGroup() {
    + DateTimeFormatter dateFormatter = DateTimeFormat.forPattern("YYYY(-MM(-dd)?)?")
    + .withLocale(Locale.US).withZone(DateTimeZone.UTC);
    + assertEquals(new DateTime(2009, 1, 1, 0, 0, 0, 0, UTC), dateFormatter.parseDateTime("2009"));
    + assertEquals(new DateTime(2009, 8, 1, 0, 0, 0, 0, UTC), dateFormatter.parseDateTime("2009-08"));
    + assertEquals(new DateTime(2009, 8, 14, 0, 0, 0, 0, UTC), dateFormatter.parseDateTime("2009-08-14"));
    + }
    +
    + public void testFormatParse_optionalLiteral() {
    + // XXX: optional "-" after year does not work
    + DateTimeFormatter dateFormatter = DateTimeFormat.forPattern("YYYY(-?)MM-?dd")
    + .withLocale(Locale.US).withZone(DateTimeZone.UTC);
    + assertEquals(new DateTime(2009, 8, 14, 0, 0, 0, 0, UTC), dateFormatter.parseDateTime("20090814"));
    + assertEquals(new DateTime(2009, 8, 14, 0, 0, 0, 0, UTC), dateFormatter.parseDateTime("2009-0814"));
    + assertEquals(new DateTime(2009, 8, 14, 0, 0, 0, 0, UTC), dateFormatter.parseDateTime("2009-08-14"));
    + }

     //-----------------------------------------------------------------------
     public void testFormatParse_textHalfdayAM_UK() {
    

    Index: src/java/org/joda/time/format/DateTimeFormat.java

    --- src/java/org/joda/time/format/DateTimeFormat.java (revision 1397)
    +++ src/java/org/joda/time/format/DateTimeFormat.java (working copy)
    @@ -374,7 +374,9 @@
    * @throws IllegalArgumentException if the pattern is invalid
    */
    static void appendPatternTo(DateTimeFormatterBuilder builder, String pattern) {
    - parsePatternTo(builder, pattern);
    + int pos = parsePatternTo(builder, pattern, 0);
    + if (pos != pattern.length())
    + throw new IllegalArgumentException("got unparsed pattern part: " + pattern.substring(pos));
    }

     //-----------------------------------------------------------------------
    

    @@ -395,12 +397,13 @@
    * @param pattern pattern specification
    * @throws IllegalArgumentException if the pattern is invalid
    * @see #forPattern
    + * @return position where parsing stopped
    */
    - private static void parsePatternTo(DateTimeFormatterBuilder builder, String pattern) {
    + private static int parsePatternTo(DateTimeFormatterBuilder builder, String pattern, int pos) {
    int length = pattern.length();
    int[] indexRef = new int[1];

    • for (int i=0; i<length; i++) {
    • for (int i=pos; i<length; ) {
      indexRef[0] = i;
      String token = parseToken(pattern, indexRef);
      i = indexRef[0];
      @@ -425,15 +428,14 @@
      boolean lenientParse = true;

                   // Peek ahead to next token.
      
      • if (i + 1 < length) {
      • indexRef[0]++;
      • if (isNumericToken(parseToken(pattern, indexRef))) {
      • if (i < length) {
      • String nextToken = parseToken(pattern, indexRef);
      • if (isNumericToken(nextToken) || nextToken.charAt(0) == '(') {
        // If next token is a number, cannot support
        // lenient parse, because it will consume digits
        // that it should not.
        lenientParse = false;
        }
      • indexRef[0]--;
        }

                 // Use pivots which are compatible with SimpleDateFormat.
        

        @@ -453,13 +455,13 @@
        int maxDigits = 9;

                 // Peek ahead to next token.
        
        • if (i + 1 < length) {
        • indexRef[0]++;
        • if (isNumericToken(parseToken(pattern, indexRef))) {
        • // If next token is a number, cannot support long years.
        • if (i < length) {
        • String nextToken = parseToken(pattern, indexRef);
        • if (isNumericToken(nextToken) || nextToken.charAt(0) == '(') {
        • // If next token is a number or group, cannot support long years.
        • // XXX: don't know to make "yyyy-?MM" pattern work
          maxDigits = tokenLen;
          }
        • indexRef[0]--;
          }

               switch (c) {
          

          @@ -555,11 +557,25 @@
          builder.appendLiteral(new String(sub));
          }
          break;
          + case '(':
          + DateTimeFormatterBuilder innerBuilder = new DateTimeFormatterBuilder();
          + i = parsePatternTo(innerBuilder, pattern, i);
          + if (i == pattern.length() || pattern.charAt(i) != ')')
          + throw new IllegalArgumentException("expecting ')'");
          + builder.append(innerBuilder.toFormatter());
          + i += 1;
          + break;
          + case ')':
          + return i - token.length();
          + case '?':
          + builder.makeTailParserOptional();
          + break;
          default:
          throw new IllegalArgumentException
          ("Illegal pattern component: " + token);
          }
          }
          + return pattern.length();
          }

      /**
      @@ -591,6 +607,8 @@
      break;
      }
      }
      + } else if (c == '(' || c == ')' || c == '|' || c == '?') {
      + buf.append(c);
      } else {
      // This will identify token as text.
      buf.append('\'');
      @@ -609,7 +627,7 @@
      inLiteral = !inLiteral;
      }
      } else if (!inLiteral &&
      - (c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z')) {
      + (c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c == '(' || c == ')' || c == '|' || c == '?')) {
      i--;
      break;
      } else {
      @@ -618,7 +636,7 @@
      }
      }

    • indexRef[0] = i;

    • indexRef[0] = i + 1;
      return buf.toString();
      }

    @@ -679,7 +697,7 @@
    formatter = (DateTimeFormatter) cPatternedCache.get(pattern);
    if (formatter == null) {
    DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
    - parsePatternTo(builder, pattern);
    + appendPatternTo(builder, pattern);
    formatter = builder.toFormatter();

                 cPatternedCache.put(pattern, formatter);
    

    Index: src/java/org/joda/time/format/DateTimeFormatterBuilder.java

    --- src/java/org/joda/time/format/DateTimeFormatterBuilder.java (revision 1397)
    +++ src/java/org/joda/time/format/DateTimeFormatterBuilder.java (working copy)
    @@ -2656,4 +2656,21 @@
    }
    }

    • public void makeTailParserOptional() {
    • if (iElementPairs.isEmpty())
    • throw new IllegalStateException("must be non-empty to make tail optional");
    • DateTimePrinter printer = (DateTimePrinter) iElementPairs.get(iElementPairs.size() - 2);
    • DateTimeParser parser = (DateTimeParser) iElementPairs.get(iElementPairs.size() - 1);
    • iElementPairs.subList(iElementPairs.size() - 2, iElementPairs.size()).clear();
    • if (parser == null)
    • throw new IllegalStateException("no tail parser");
    • if (printer != null) {
    • append(printer, new DateTimeParser[] { parser, null });
    • } else {
    • appendOptional(parser);
    • }
    • }
      +
      }
      ===
     
  • Stepan Koltsov

    Stepan Koltsov - 2009-09-04

    patch v1 — no variants, only optional

     
  • Stepan Koltsov

    Stepan Koltsov - 2009-09-04

    patch v2 — no variants, only optional, pattern yyyy-?MM now works

     
  • Stepan Koltsov

    Stepan Koltsov - 2009-09-04

    patch v3 — all features, not doc yet

     
  • Stepan Koltsov

    Stepan Koltsov - 2009-09-04

    patch v4 — all declared features, tests, doc

     
  • Stepan Koltsov

    Stepan Koltsov - 2009-09-04

    Attached version of the patch with all features implemented, with tests and with documentation.

     
  • Stephen Colebourne

    I see what you are trying to achieve here, and it isn't necessarily a bad idea. It is backwards incompatible however with current patterns.

    If I were to see something like this added to Joda-Time, I'd probably want to follow the model that I'd started in JSR-310.

    So, I can't include it due to compatibility, and I'm not sure whether its too complex for Joda-Time in general.

     
  • Stepan Koltsov

    Stepan Koltsov - 2009-09-24

    Maybe it worth adding another method forPatternExt() that accepts extended pattern, keeping forPatten() intact?

     

Log in to post a comment.