From: <hib...@li...> - 2006-06-30 05:54:40
|
Author: ste...@jb... Date: 2006-06-30 01:54:36 -0400 (Fri, 30 Jun 2006) New Revision: 10069 Added: branches/HQL_ANTLR_2/Hibernate3/g2/ branches/HQL_ANTLR_2/Hibernate3/g2/parse.g branches/HQL_ANTLR_2/Hibernate3/g2/resolve.g Log: redid parse phase and simplified portions of resolve phase Added: branches/HQL_ANTLR_2/Hibernate3/g2/parse.g =================================================================== --- branches/HQL_ANTLR_2/Hibernate3/g2/parse.g 2006-06-28 17:07:06 UTC (rev 10068) +++ branches/HQL_ANTLR_2/Hibernate3/g2/parse.g 2006-06-30 05:54:36 UTC (rev 10069) @@ -0,0 +1,937 @@ +header +{ +// $Id: $ + +package org.hibernate.hql.antlr; + +import org.hibernate.hql.ast.*; +import org.hibernate.hql.ast.util.*; + +} +/** + * An Antlr stream parser for building a syntax AST representing + * an input Hibernate Query Language (HQL) query. + * + * @author Joshua Davis + * @author Steve Ebersole + */ +class GeneratedHqlParser extends Parser; + +options +{ + exportVocab=Parse; + buildAST=true; + k=3; // For 'not like', 'not in', etc. +} + +tokens +{ + // -- HQL Keyword tokens -- + ALL="all"; + ANY="any"; + AND="and"; + AS="as"; + ASCENDING="asc"; + AVG="avg"; + BETWEEN="between"; + CLASS="class"; + COUNT="count"; + DELETE="delete"; + DESCENDING="desc"; + DOT; + DISTINCT="distinct"; + ELEMENTS="elements"; + ESCAPE="escape"; + EXISTS="exists"; + FALSE="false"; + FETCH="fetch"; + FROM="from"; + FULL="full"; + GROUP="group"; + HAVING="having"; + IN="in"; + INDICES="indices"; + INNER="inner"; + INSERT="insert"; + INTO="into"; + IS="is"; + JOIN="join"; + LEFT="left"; + LIKE="like"; + MAX="max"; + MIN="min"; + MINUS_Q="minus"; // i.e. select a from B minus select c from D + NEW="new"; + NOT="not"; + NULL="null"; + OR="or"; + ORDER="order"; + OUTER="outer"; + PROPERTIES="properties"; + RIGHT="right"; + SELECT="select"; + SET="set"; + SOME="some"; + SUM="sum"; + TRUE="true"; + UNION="union"; + UPDATE="update"; + VERSIONED="versioned"; + WHERE="where"; + + // -- SQL tokens -- + // These aren't part of HQL, but the SQL fragment parser uses the HQL lexer, so they need to be declared here. + CASE="case"; + END="end"; + ELSE="else"; + THEN="then"; + WHEN="when"; + ON="on"; + WITH="with"; + + // -- EJBQL tokens -- + BOTH="both"; + EMPTY="empty"; + LEADING="leading"; + MEMBER="member"; + OBJECT="object"; + OF="of"; + TRAILING="trailing"; + + // -- Synthetic token types -- + AGGREGATE; // One of the aggregate functions (e.g. min, max, avg) + ALIAS; + CONSTRUCTOR; + CASE2; + EXPR_LIST; + FILTER_ENTITY; // FROM element injected because of a filter expression (happens during compilation phase 2) + IN_LIST; + INDEX_OP; + IS_NOT_NULL; + IS_NULL; // Unary 'is null' operator. + METHOD_CALL; + NOT_BETWEEN; + NOT_IN; + NOT_LIKE; + ORDER_ELEMENT; + QUERY; + RANGE; + ROW_STAR; + SELECT_FROM; + UNARY_MINUS; + UNARY_PLUS; + VECTOR_EXPR; // ( x, y, z ) + WEIRD_IDENT; // Identifiers that were keywords when they came in. + ENTITY_NAME; + COLLECTION_ROLE; + CLASS_NAME; + + // Literal tokens. + CONSTANT; + NUM_DOUBLE; + NUM_FLOAT; + NUM_LONG; + JAVA_CONSTANT; +} + +{ + /** True if this is a filter query (allow no FROM clause). **/ + private boolean filter = false; + + /** + * Sets the filter flag. + * @param f True for a filter query, false for a normal query. + */ + public void setFilter(boolean f) { + filter = f; + } + + /** + * Returns true if this is a filter query, false if not. + * @return true if this is a filter query, false if not. + */ + public boolean isFilter() { + return filter; + } + + /** + * This method is overriden in the sub class in order to provide the + * 'keyword as identifier' hack. + * @param token The token to retry as an identifier. + * @param ex The exception to throw if it cannot be retried as an identifier. + */ + public AST handleIdentifierError(Token token,RecognitionException ex) throws RecognitionException, TokenStreamException { + // Base implementation: Just re-throw the exception. + throw ex; + } + + /** + * This method looks ahead and converts . <token> into . IDENT when + * appropriate. + */ + public void handleDotIdent() throws TokenStreamException { + } + + /** + * Returns the negated equivalent of the expression. + * @param x The expression to negate. + */ + public AST negateNode(AST x) { + // Just create a 'not' parent for the default behavior. + return ASTUtil.createParent(astFactory, NOT, "not", x); + } + + /** + * Returns the 'cleaned up' version of a comparison operator sub-tree. + * @param x The comparison operator to clean up. + */ + public AST processEqualityExpression(AST x) throws RecognitionException { + return x; + } + + public void weakKeywords() throws TokenStreamException { } + + public void processMemberOf(Token n,AST p,ASTPair currentAST) { } + + public String extractEntityName(AST node) throws RecognitionException { + return node.getText(); + } + + public boolean isJavaConstant() throws RecognitionException, TokenStreamException { + return false; + } + + public boolean isJavaConstant(AST dotStructure) throws RecognitionException, TokenStreamException { + return false; + } + + public String extractJavaConstantReference(AST node) throws RecognitionException { + return node.getText(); + } + + public String extractDynamicInstantiationPojoName(AST node) { + return node.getText(); + } + + public void showAST(AST ast) { + } +} + + +// MAIN RULE ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +statement + : ( selectStatement | updateStatement | deleteStatement | insertStatement ) + ; + + +// select statement ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +selectStatement + : queryRule { + #selectStatement = #([QUERY,"query"], #selectStatement); + } + ; + +queryRule + : selectFrom (whereClause)? (groupByClause)? (orderByClause)? + ; + +selectFrom! + : (s:selectClause)? (f:fromClause)? { + // If there was no FROM clause and this is a filter query, create a from clause. Otherwise, throw + // an exception because non-filter queries must have a FROM clause. + if (#f == null) { + if (filter) { + #f = #([FROM,"{filter-implied FROM}"]); + } + else + throw new SemanticException("FROM expected (non-filter queries must contain a FROM clause)"); + } + // Create an artificial token so the 'FROM' can be placed + // before the SELECT in the tree to make tree processing + // simpler. + #selectFrom = #([SELECT_FROM,"SELECT_FROM"],f,s); + } + ; + + +// select clause ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +selectClause + : SELECT^ // NOTE: The '^' after a token causes the corresponding AST node to be the root of the sub-tree. + { weakKeywords(); } // Weak keywords can appear immediately after a SELECT token. + (DISTINCT)? ( selectedPropertiesList | newExpression | selectObject ) + ; + + +// from clause ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +// NOTE: This *must* begin with the "FROM" token, otherwise the sub-query rule will be ambiguous +// with the expression rule. +// Also note: after a comma weak keywords are allowed and should be treated as identifiers. +fromClause + : FROM^ { weakKeywords(); } fromRange ( fromJoin | COMMA! { weakKeywords(); } fromRange )* + ; + +fromRange + : fromClassOrOuterQueryPath + | inClassDeclaration + | inCollectionDeclaration + | inCollectionElementsDeclaration + ; + +fromClassOrOuterQueryPath! + : c:path { weakKeywords(); } (a:asAlias)? (p:propertyFetch)? { + String entityName = extractEntityName( #c ); + AST en = #( [ENTITY_NAME, entityName] ); + en.initialize( #c ); + #fromClassOrOuterQueryPath = #([RANGE, "RANGE"], [ENTITY_NAME, entityName], #a, #p); + } + ; + +inClassDeclaration! + : a:alias IN! CLASS! c:path { + String entityName = extractEntityName( #c ); + #inClassDeclaration = #([RANGE, "RANGE"], [ENTITY_NAME, entityName], #a); + } + ; + +inCollectionDeclaration! + : IN! OPEN! p:path CLOSE! a:alias { + #inCollectionDeclaration = #([JOIN, "join"], [INNER, "inner"], #p, #a); + } + ; + +inCollectionElementsDeclaration! + : a:alias IN! ELEMENTS! OPEN! p:path CLOSE! { + #inCollectionElementsDeclaration = #([JOIN, "join"], [INNER, "inner"], #p, #a); + } + ; + +//fromJoin +// : ( ( ( LEFT | RIGHT ) (OUTER)? ) | FULL | INNER )? JOIN^ (FETCH)? path (asAlias)? (propertyFetch)? (withClause)? +// ; +fromJoin! + : (jt:joinType)? j:JOIN (f:FETCH)? p:path (a:asAlias)? (pf:propertyFetch)? (w:withClause)? { + #fromJoin = #( #j, #jt, #f, #a, #pf, #p, #w ); + } + ; + +joinType + : ( ( LEFT | RIGHT ) (OUTER)? ) + | FULL + | INNER + ; + +withClause + : WITH^ logicalExpression + ; + + +// Alias rule - Parses the optional 'as' token and forces an AST identifier node. +asAlias + : (AS!)? alias + ; + +alias + : a:identifier { #a.setType(ALIAS); } + ; + +propertyFetch + : FETCH ALL! PROPERTIES! + ; + + +// update statement ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +updateStatement + : UPDATE^ (VERSIONED)? optionalFromTokenFromClause setClause (whereClause)? + ; + + +// delete statement ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +deleteStatement + : DELETE^ (optionalFromTokenFromClause) (whereClause)? + ; + + +// insert statement ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +insertStatement + // Would be nice if we could abstract the FromClause/FromElement logic + // out such that it could be reused here; something analogous to + // a "table" rule in sql-grammars + : INSERT^ intoClause selectStatement + ; + +union + : queryRule (UNION queryRule)* + ; + + +// clauses ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +optionalFromTokenFromClause! + : (FROM!)? f:path (a:asAlias)? { + #optionalFromTokenFromClause = #( [FROM, "FROM"], #f, #a ); + } + ; + +setClause + : (SET^ assignment (COMMA! assignment)*) + ; + +assignment + : stateField EQ^ newValue + ; + +// "state_field" is the term used in the EJB3 sample grammar; used here for easy reference. +// it is basically a property ref +stateField + : path + ; + +newValue + : concatenation + ; + +intoClause + : INTO^ path { weakKeywords(); } insertablePropertySpec + ; + +insertablePropertySpec + : OPEN! primaryExpression ( COMMA! primaryExpression )* CLOSE! { + // Just need *something* to distinguish this on the hql-sql.g side + #insertablePropertySpec = #([RANGE, "column-spec"], #insertablePropertySpec); + } + ; + + +//newExpression +// : (NEW! path) op:OPEN^ {#op.setType(CONSTRUCTOR);} selectedPropertiesList CLOSE! +// ; +newExpression! + : ( NEW! c:path ) OPEN! args:selectedPropertiesList CLOSE! { + String className = extractDynamicInstantiationPojoName( #c ); + AST container = #( [CLASS_NAME, className] ); + #newExpression = #( [CONSTRUCTOR, "new"], #container, #args ); + } + ; + +selectObject + : OBJECT^ OPEN! identifier CLOSE! + ; + +//## groupByClause: +//## GROUP_BY path ( COMMA path )*; + +groupByClause + : GROUP^ + "by"! expression ( COMMA! expression )* + (havingClause)? + ; + +//## orderByClause: +//## ORDER_BY selectedPropertiesList; + +orderByClause + : ORDER^ "by"! orderElement ( COMMA! orderElement )* + ; + +orderElement + : expression ( ascendingOrDescending )? + ; + +ascendingOrDescending + : ( "asc" | "ascending" ) { #ascendingOrDescending.setType(ASCENDING); } + | ( "desc" | "descending") { #ascendingOrDescending.setType(DESCENDING); } + ; + +//## havingClause: +//## HAVING logicalExpression; + +havingClause + : HAVING^ logicalExpression + ; + +//## whereClause: +//## WHERE logicalExpression; + +whereClause + : WHERE^ logicalExpression + ; + +//## selectedPropertiesList: +//## ( path | aggregate ) ( COMMA path | aggregate )*; + +selectedPropertiesList + : aliasedExpression ( COMMA! aliasedExpression )* + ; + +aliasedExpression + : expression ( AS^ identifier )? + ; + +// expressions +// Note that most of these expressions follow the pattern +// thisLevelExpression : +// nextHigherPrecedenceExpression +// (OPERATOR nextHigherPrecedenceExpression)* +// which is a standard recursive definition for a parsing an expression. +// +// Operator precedence in HQL +// lowest --> ( 7) OR +// ( 6) AND, NOT +// ( 5) equality: ==, <>, !=, is +// ( 4) relational: <, <=, >, >=, +// LIKE, NOT LIKE, BETWEEN, NOT BETWEEN, IN, NOT IN +// ( 3) addition and subtraction: +(binary) -(binary) +// ( 2) multiplication: * / %, concatenate: || +// highest --> ( 1) +(unary) -(unary) +// [] () (method call) . (dot -- identifier qualification) +// aggregate function +// () (explicit parenthesis) +// +// Note that the above precedence levels map to the rules below... +// Once you have a precedence chart, writing the appropriate rules as below +// is usually very straightfoward + +logicalExpression + : expression + ; + +// Main expression rule +expression + : logicalOrExpression + ; + +// level 7 - OR +logicalOrExpression + : logicalAndExpression ( OR^ logicalAndExpression )* + ; + +// level 6 - AND, NOT +logicalAndExpression + : negatedExpression ( AND^ negatedExpression )* + ; + +// NOT nodes aren't generated. Instead, the operator in the sub-tree will be +// negated, if possible. Expressions without a NOT parent are passed through. +negatedExpression! +{ weakKeywords(); } // Weak keywords can appear in an expression, so look ahead. + : NOT^ x:negatedExpression { #negatedExpression = negateNode(#x); } + | y:equalityExpression { #negatedExpression = #y; } + ; + +//## OP: EQ | LT | GT | LE | GE | NE | SQL_NE | LIKE; + +// level 5 - EQ, NE +equalityExpression + : x:relationalExpression ( + ( EQ^ + | is:IS^ { #is.setType(EQ); } (NOT! { #is.setType(NE); } )? + | NE^ + | ne:SQL_NE^ { #ne.setType(NE); } + ) y:relationalExpression)* { + // Post process the equality expression to clean up 'is null', etc. + #equalityExpression = processEqualityExpression(#equalityExpression); + } + ; + +// level 4 - LT, GT, LE, GE, LIKE, NOT LIKE, BETWEEN, NOT BETWEEN +// NOTE: The NOT prefix for LIKE and BETWEEN will be represented in the +// token type. When traversing the AST, use the token type, and not the +// token text to interpret the semantics of these nodes. +relationalExpression + : concatenation ( + ( ( ( LT^ | GT^ | LE^ | GE^ ) additiveExpression )* ) + // Disable node production for the optional 'not'. + | (n:NOT!)? ( + // Represent the optional NOT prefix using the token type by + // testing 'n' and setting the token type accordingly. + (i:IN^ { + #i.setType( (n == null) ? IN : NOT_IN); + #i.setText( (n == null) ? "in" : "not in"); + } + inList) + | (b:BETWEEN^ { + #b.setType( (n == null) ? BETWEEN : NOT_BETWEEN); + #b.setText( (n == null) ? "between" : "not between"); + } + betweenList ) + | (l:LIKE^ { + #l.setType( (n == null) ? LIKE : NOT_LIKE); + #l.setText( (n == null) ? "like" : "not like"); + } + concatenation likeEscape) + | (MEMBER! OF! p:path! { + processMemberOf(n,#p,currentAST); + } ) ) + ) + ; + +likeEscape + : (ESCAPE^ concatenation)? + ; + +inList + : x:compoundExpr + { #inList = #([IN_LIST,"inList"], #inList); } + ; + +betweenList + : concatenation AND! concatenation + ; + +//level 4 - string concatenation +concatenation + : additiveExpression + ( c:CONCAT^ { #c.setType(EXPR_LIST); #c.setText("concatList"); } + additiveExpression + ( CONCAT! additiveExpression )* + { #concatenation = #([METHOD_CALL, "||"], #([IDENT, "concat"]), #c ); } )? + ; + +// level 3 - binary plus and minus +additiveExpression + : multiplyExpression ( ( PLUS^ | MINUS^ ) multiplyExpression )* + ; + +// level 2 - binary multiply and divide +multiplyExpression + : unaryExpression ( ( STAR^ | DIV^ ) unaryExpression )* + ; + +// level 1 - unary minus, unary plus, not +unaryExpression + : MINUS^ {#MINUS.setType(UNARY_MINUS);} unaryExpression + | PLUS^ {#PLUS.setType(UNARY_PLUS);} unaryExpression + | caseExpression + | quantifiedExpression + | atom + ; + +caseExpression + : CASE^ (whenClause)+ (elseClause)? END! + | CASE^ { #CASE.setType(CASE2); } unaryExpression (altWhenClause)+ (elseClause)? END! + ; + +whenClause + : (WHEN^ logicalExpression THEN! unaryExpression) + ; + +altWhenClause + : (WHEN^ unaryExpression THEN! unaryExpression) + ; + +elseClause + : (ELSE^ unaryExpression) + ; + +quantifiedExpression + : ( SOME^ | EXISTS^ | ALL^ | ANY^ ) + ( identifier | collectionExpr | (OPEN! ( subQuery ) CLOSE!) ) + ; + +// level 0 - expression atom +// ident qualifier ('.' ident ), array index ( [ expr ] ), +// method call ( '.' ident '(' exprList ') ) +atom + : primaryExpression + ( + DOT^ identifier + ( options { greedy=true; } : + ( op:OPEN^ {#op.setType(METHOD_CALL);} exprList CLOSE! ) )? + | lb:OPEN_BRACKET^ {#lb.setType(INDEX_OP);} expression CLOSE_BRACKET! + )* + ; + +primaryExpression + : identPrimary ( options {greedy=true;} : DOT^ "class" )? + | constant + | COLON^ identifier + // TODO: Add parens to the tree so the user can control the operator evaluation order. + | OPEN! (expressionOrVector | subQuery) CLOSE! + | PARAM^ (NUM_INT)? + ; + +// This parses normal expression and a list of expressions separated by commas. If a comma is encountered +// a parent VECTOR_EXPR node will be created for the list. +expressionOrVector! + : e:expression ( v:vectorExpr )? { + // If this is a vector expression, create a parent node for it. + if (#v != null) + #expressionOrVector = #([VECTOR_EXPR,"{vector}"], #e, #v); + else + #expressionOrVector = #e; + } + ; + +vectorExpr + : COMMA! expression (COMMA! expression)* + ; + +// identifier, followed by member refs (dot ident), or method calls. +// NOTE: handleDotIdent() is called immediately after the first IDENT is recognized because +// the method looks a head to find keywords after DOT and turns them into identifiers. +identPrimary + : identifier { handleDotIdent(); } + ( options { greedy=true; } : DOT^ ( identifier | ELEMENTS | o:OBJECT { #o.setType(IDENT); } ) )* + ( options { greedy=true; } : ( op:OPEN^ { #op.setType(METHOD_CALL);} exprList CLOSE! ) )? { + if ( isJavaConstant( #identPrimary ) ) { + String constant = extractJavaConstantReference( #identPrimary ); + #identPrimary = #( [JAVA_CONSTANT, constant] ); + } + } + // Also allow special 'aggregate functions' such as count(), avg(), etc. + | aggregate + ; + +//## aggregate: +//## ( aggregateFunction OPEN path CLOSE ) | ( COUNT OPEN STAR CLOSE ) | ( COUNT OPEN (DISTINCT | ALL) path CLOSE ); + +//## aggregateFunction: +//## COUNT | 'sum' | 'avg' | 'max' | 'min'; + +aggregate + : ( SUM^ | AVG^ | MAX^ | MIN^ ) OPEN! additiveExpression CLOSE! { #aggregate.setType(AGGREGATE); } + // Special case for count - It's 'parameters' can be keywords. + | COUNT^ OPEN! ( STAR { #STAR.setType(ROW_STAR); } | ( ( DISTINCT | ALL )? ( path | collectionExpr ) ) ) CLOSE! + | collectionExpr + ; + +//## collection: ( OPEN query CLOSE ) | ( 'elements'|'indices' OPEN path CLOSE ); + +collectionExpr + : (ELEMENTS^ | INDICES^) OPEN! path CLOSE! + ; + +// NOTE: compoundExpr can be a 'path' where the last token in the path is '.elements' or '.indicies' +compoundExpr + : collectionExpr + | path + | (OPEN! ( (expression (COMMA! expression)*) | subQuery ) CLOSE!) + ; + +subQuery + : union + { #subQuery = #([QUERY,"query"], #subQuery); } + ; + +exprList +{ + AST trimSpec = null; +} + : (t:TRAILING {#trimSpec = #t;} | l:LEADING {#trimSpec = #l;} | b:BOTH {#trimSpec = #b;})? + { if(#trimSpec != null) #trimSpec.setType(IDENT); } + ( + expression ( (COMMA! expression)+ | FROM { #FROM.setType(IDENT); } expression | AS! identifier )? + | FROM { #FROM.setType(IDENT); } expression + )? + { #exprList = #([EXPR_LIST,"exprList"], #exprList); } + ; + +constant + : NUM_INT + | NUM_FLOAT + | NUM_LONG + | NUM_DOUBLE + | QUOTED_STRING + | NULL + | TRUE + | FALSE + | EMPTY + ; + +javaConstant! + : c:path { + String constant = extractJavaConstantReference( #c ); + #javaConstant = #( [JAVA_CONSTANT, constant] ); + } + ; + +//## quantifiedExpression: 'exists' | ( expression 'in' ) | ( expression OP 'any' | 'some' ) collection; + +//## compoundPath: path ( OPEN_BRACKET expression CLOSE_BRACKET ( '.' path )? )*; + +//## path: identifier ( '.' identifier )*; + +path + : identifier ( DOT^ { weakKeywords(); } identifier )* + ; + + +// Wraps the IDENT token from the lexer, in order to provide +// 'keyword as identifier' trickery. +identifier + : IDENT + exception + catch [RecognitionException ex] + { + identifier_AST = handleIdentifierError(LT(1),ex); + } + ; + +// **** LEXER ****************************************************************** + +/** + * Hibernate Query Language Lexer, which provides the HQL parser with tokens. + * + * @author Joshua Davis + */ +class GeneratedParseLexer extends Lexer; + +options { + exportVocab=Parse; + testLiterals = false; + k=2; // needed for newline, and to distinguish '>' from '>='. + // HHH-241 : Quoted strings don't allow unicode chars - This should fix it. + charVocabulary='\u0000'..'\uFFFE'; // Allow any char but \uFFFF (16 bit -1, ANTLR's EOF character) + caseSensitive = false; + caseSensitiveLiterals = false; +} + +// -- Declarations -- +{ + // NOTE: The real implementations are in the subclass. + protected void setPossibleID(boolean possibleID) {} +} + +// -- Keywords -- + +EQ: '='; +LT: '<'; +GT: '>'; +SQL_NE: "<>"; +NE: "!=" | "^="; +LE: "<="; +GE: ">="; + +COMMA: ','; + +OPEN: '('; +CLOSE: ')'; +OPEN_BRACKET: '['; +CLOSE_BRACKET: ']'; + +CONCAT: "||"; +PLUS: '+'; +MINUS: '-'; +STAR: '*'; +DIV: '/'; +COLON: ':'; +PARAM: '?'; + +IDENT options { testLiterals=true; } + : ID_START_LETTER ( ID_LETTER )* + { + // Setting this flag allows the grammar to use keywords as identifiers, if necessary. + setPossibleID(true); + } + ; + +protected +ID_START_LETTER + : '_' + | '$' + | 'a'..'z' + | '\u0080'..'\ufffe' // HHH-558 : Allow unicode chars in identifiers + ; + +protected +ID_LETTER + : ID_START_LETTER + | '0'..'9' + ; + +QUOTED_STRING + : '\'' ( (ESCqs)=> ESCqs | ~'\'' )* '\'' + ; + +protected +ESCqs + : + '\'' '\'' + ; + +WS : ( ' ' + | '\t' + | '\r' '\n' { newline(); } + | '\n' { newline(); } + | '\r' { newline(); } + ) + {$setType(Token.SKIP);} //ignore this token + ; + +//--- From the Java example grammar --- +// a numeric literal +NUM_INT + {boolean isDecimal=false; Token t=null;} + : '.' {_ttype = DOT;} + ( ('0'..'9')+ (EXPONENT)? (f1:FLOAT_SUFFIX {t=f1;})? + { + if (t != null && t.getText().toUpperCase().indexOf('F')>=0) + { + _ttype = NUM_FLOAT; + } + else + { + _ttype = NUM_DOUBLE; // assume double + } + } + )? + | ( '0' {isDecimal = true;} // special case for just '0' + ( ('x') + ( // hex + // the 'e'|'E' and float suffix stuff look + // like hex digits, hence the (...)+ doesn't + // know when to stop: ambig. ANTLR resolves + // it correctly by matching immediately. It + // is therefore ok to hush warning. + options { warnWhenFollowAmbig=false; } + : HEX_DIGIT + )+ + | ('0'..'7')+ // octal + )? + | ('1'..'9') ('0'..'9')* {isDecimal=true;} // non-zero decimal + ) + ( ('l') { _ttype = NUM_LONG; } + + // only check to see if it's a float if looks like decimal so far + | {isDecimal}? + ( '.' ('0'..'9')* (EXPONENT)? (f2:FLOAT_SUFFIX {t=f2;})? + | EXPONENT (f3:FLOAT_SUFFIX {t=f3;})? + | f4:FLOAT_SUFFIX {t=f4;} + ) + { + if (t != null && t.getText().toUpperCase() .indexOf('F') >= 0) + { + _ttype = NUM_FLOAT; + } + else + { + _ttype = NUM_DOUBLE; // assume double + } + } + )? + ; + +// hexadecimal digit (again, note it's protected!) +protected +HEX_DIGIT + : ('0'..'9'|'a'..'f') + ; + +// a couple protected methods to assist in matching floating point numbers +protected +EXPONENT + : ('e') ('+'|'-')? ('0'..'9')+ + ; + +protected +FLOAT_SUFFIX + : 'f'|'d' + ; + Added: branches/HQL_ANTLR_2/Hibernate3/g2/resolve.g =================================================================== --- branches/HQL_ANTLR_2/Hibernate3/g2/resolve.g 2006-06-28 17:07:06 UTC (rev 10068) +++ branches/HQL_ANTLR_2/Hibernate3/g2/resolve.g 2006-06-30 05:54:36 UTC (rev 10069) @@ -0,0 +1,382 @@ +header +{ +// $Id:$ +package org.hibernate.hql.antlr; + +import java.util.*; +} + +/** + * An Antlr tree parser for "resolving" or "normalizing" an HQL + * syntax AST. This parser provides the vast majority of the + * semantic analysis of the HQL AST. + * <p/> + * Both "resolving" and "normalizing" here seek a single goal of + * building a dis-ambiguated, generic query AST. + * <p/> + * The act of resolving is essentially the process of simplifying + * complex node structures into atomic components based on contextual + * information (aka, the current parser state). The main thrust + * of this process is breaking down dot-structures (a series of + * DOT INDET pairs) into <ul> + * <li>a series of "implicit" join structures injected into the from clause tree</li> + * <li>a simple structure representing the "meaning" of the "leaf" of said dot-structure</li> + * </ul> + * <p/> + * The act of normalizing essentially refers to the process of dis-ambiguating + * node structures based on their context and creating a unified AST + * representation for different ways to express the same "idea". + * + * @author Joshua Davis + * @author Steve Ebersole + */ +class GeneratedHqlResolver extends TreeParser; + +options +{ + importVocab=Parse; + exportVocab=Resolve; + buildAST=true; +} + +tokens +{ + PROPERTY_REF; + ENTITY_PERSISTER_REF; + COLLECTION_PERSISTER_REF; + BOGUS; +} + + +// -- Declarations -- +{ + + // Statement node BEGIN/END handling ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + protected void pushStatement(AST statementNode) { + } + + protected void popStatement() { + } + + + // implicit join context pushing/popping ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + protected void pushExplicitJoinContext(AST joinType, AST fetch, AST alias, AST propertyFetch) { + } + + protected void popExplicitJoinContext() { + } + + // persister reference handling ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + protected AST buildEntityPersisterReference(AST entityName, AST alias, AST propertyFetch) { + return null; + } + + protected void handleAdHocJoinNode(AST persisterReference, AST joinType, AST onClause) { + } + + + // property reference handling ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + protected void handleIntermediatePathPart(AST name) { + } + + protected AST handleTerminalPathPart(AST name) { + return null; + } + +} + +// The main statement rule. +statement + : selectStatement | updateStatement | deleteStatement | insertStatement + ; + +// --- HQL statements --- + +selectStatement + : query + ; + +updateStatement + : #(UPDATE { pushStatement( #updateStatement ); } (VERSIONED)? fromClause setClause (whereClause)? { popStatement(); }) + ; + +deleteStatement + : #(DELETE { pushStatement( #deleteStatement ); } fromClause (whereClause)? { popStatement(); }) + ; + +insertStatement + : #(INSERT { pushStatement( #insertStatement ); } intoClause query { popStatement(); }) + ; + +query + : #(QUERY { pushStatement( #query ); } + // The first phase places the FROM first to make processing the SELECT simpler. + #( SELECT_FROM fromClause (selectClause)? ) + (whereClause)? + (groupClause)? + (orderClause)? { + popStatement(); + } + ) + ; + +// TODO : for now, just copy over the entire subtree +selectClause + : #(SELECT (subtree)* ) + ; + +// -- Language sub-elements -- + + +fromClause + : #( f:FROM range ( explicitJoin | range )* ) + ; + +range! + : #( RANGE e:entityPersisterReference ) { + #range = #e; + } + ; + +entityPersisterReference! + : en:ENTITY_NAME (a:ALIAS)? (pf:FETCH)? { + #entityPersisterReference = buildEntityPersisterReference( en, a, pf ); + } + ; + +explicitJoin! + : #(JOIN (jt:joinType)? joinRhs[jt] ) + ; + +joinRhs! [AST joinType] + : e:entityPersisterReference (on:ON)? { + handleAdHocJoinNode( #e, joinType, on ); + } + | (f:FETCH)? (a:ALIAS)? (pf:FETCH)? { pushExplicitJoinContext( joinType, #f, #a, #pf ); } prop:propertyPath (with:WITH)? { + popExplicitJoinContext(); + } + ; + +// TODO : still need to account for index operators in this series of rules... +propertyPath + : singlePartPropertyPath + | multiPartPropertyPath + ; + +singlePartPropertyPath! + : i:identifier { + #singlePartPropertyPath = handleTerminalPathPart( #i ); + } + ; + +multiPartPropertyPath! + : #( d:DOT lhs:multiPartPropertyPathIntermediateStructure rhs:multiPartPropertyPathTerminus ) { + #multiPartPropertyPath = #rhs; + } + ; + +multiPartPropertyPathIntermediateStructure! + : i:identifier { + // this represents the "root" of the path expression + handleIntermediatePathPart( #i ); + } + | #( d:DOT lhs:multiPartPropertyPathIntermediateStructure rhs:propertyName ) { + handleIntermediatePathPart( #rhs ); + } + ; + +multiPartPropertyPathTerminus! + : p:propertyName { + #multiPartPropertyPathTerminus = handleTerminalPathPart( #p ); + } + ; + +// TODO : need to add cross joins +joinType + : ( (LEFT | RIGHT) (OUTER)? ) + | FULL + | INNER + ; + +intoClause + : #(i:INTO (subtree)* ) + ; + +whereClause + : #(WHERE logicalExpr ) + ; + +groupClause + : #(GROUP (subtree)* ) + ; + +orderClause + : #(ORDER (subtree)* ) + ; + +setClause + : #(SET (subtree)* ) + ; + +logicalExpr + : #(AND logicalExpr logicalExpr) + | #(OR logicalExpr logicalExpr) + | #(NOT logicalExpr) + | comparisonExpr + ; + +comparisonExpr + : + ( #(EQ exprOrSubquery exprOrSubquery) + | #(NE exprOrSubquery exprOrSubquery) + | #(LT exprOrSubquery exprOrSubquery) + | #(GT exprOrSubquery exprOrSubquery) + | #(LE exprOrSubquery exprOrSubquery) + | #(GE exprOrSubquery exprOrSubquery) + | #(LIKE exprOrSubquery expr ( #(ESCAPE expr) )? ) + | #(NOT_LIKE exprOrSubquery expr ( #(ESCAPE expr) )? ) + | #(BETWEEN exprOrSubquery exprOrSubquery exprOrSubquery) + | #(NOT_BETWEEN exprOrSubquery exprOrSubquery exprOrSubquery) + | #(IN exprOrSubquery inRhs ) + | #(NOT_IN exprOrSubquery inRhs ) + | #(IS_NULL exprOrSubquery) + | #(IS_NOT_NULL exprOrSubquery) + | #(EXISTS ( expr | collectionFunctionOrSubselect ) ) + ) + ; + +inRhs + : #(IN_LIST ( collectionFunctionOrSubselect | ( (expr)* ) ) ) + ; + +exprOrSubquery + : expr + | query + | #(ANY collectionFunctionOrSubselect) + | #(ALL collectionFunctionOrSubselect) + | #(SOME collectionFunctionOrSubselect) + ; + +collectionFunctionOrSubselect + : collectionFunction + | query + ; + +collectionFunction + : #( ELEMENTS propertyRef ) + | #( INDICES propertyRef ) + ; + +count + : #(COUNT ( DISTINCT | ALL )? ( aggregateExpr | ROW_STAR ) ) + ; + +aggregateExpr + : expr + | collectionFunction + ; + +expr + : addrExpr + | #( VECTOR_EXPR (expr)* ) + | constant + | arithmeticExpr + | functionCall // Function call, not in the SELECT clause. + | parameter + | count // Count, not in the SELECT clause. + ; + +arithmeticExpr + : #(PLUS expr expr) + | #(MINUS expr expr) + | #(DIV expr expr) + | #(STAR expr expr) + | #(UNARY_MINUS expr) + | caseExpr + ; + +caseExpr + : #(CASE (#(WHEN logicalExpr expr))+ (#(ELSE expr))?) + | #(CASE2 expr (#(WHEN expr expr))+ (#(ELSE expr))?) + ; + +addrExpr + : propertyRef + | #(INDEX_OP addrExprLhs expr) + ; + +addrExprLhs + : addrExpr + ; + +constant + : literal + | NULL + | TRUE + | FALSE + ; + +literal + : NUM_INT + | NUM_LONG + | NUM_FLOAT + | NUM_DOUBLE + | QUOTED_STRING + ; + +parameter + : #(COLON identifier) + | #(PARAM (NUM_INT)?) + ; + +functionCall + : #(METHOD_CALL pathAsIdent ( #(EXPR_LIST (expr)* ) )? ) + | #(AGGREGATE aggregateExpr ) + ; + +propertyRef + : propertyPath + ; + +propertyName + : identifier + | CLASS + | ELEMENTS + | INDICES + ; + +// Matches a path and returns the normalized string for the path (usually +// fully qualified a class name). +pathAsString returns [String p] { + p = "???"; + String x = "?x?"; + } + : a:identifier { p = a.getText(); } + | #(DOT x=pathAsString y:identifier) { + StringBuffer buf = new StringBuffer(); + buf.append(x).append(".").append(y.getText()); + p = buf.toString(); + } + ; + +// Returns a path as a single identifier node. +pathAsIdent { + String text = "?text?"; + } + : text=pathAsString { + #pathAsIdent = #([IDENT,text]); + } + ; + +identifier + : (IDENT | WEIRD_IDENT) + ; + +// General subtree. Matches anything, copies the tree verbatim. +subtree + : #(. (subtree)*) + ; \ No newline at end of file |