From: <asf...@us...> - 2015-03-23 02:55:44
|
Revision: 61059 http://sourceforge.net/p/firebird/code/61059 Author: asfernandes Date: 2015-03-23 02:55:42 +0000 (Mon, 23 Mar 2015) Log Message: ----------- Feature CORE-4717 - Aggregate statistical functions COVAR_SAMP, COVAR_POP and CORR - contributed by Hajime Nakagami. Modified Paths: -------------- firebird/trunk/doc/WhatsNew firebird/trunk/doc/sql.extensions/README.statistical_functions.txt firebird/trunk/src/dsql/AggNodes.cpp firebird/trunk/src/dsql/AggNodes.h firebird/trunk/src/dsql/parse.y firebird/trunk/src/jrd/blp.h firebird/trunk/src/jrd/blr.h firebird/trunk/src/yvalve/keywords.cpp Modified: firebird/trunk/doc/WhatsNew =================================================================== --- firebird/trunk/doc/WhatsNew 2015-03-23 00:20:24 UTC (rev 61058) +++ firebird/trunk/doc/WhatsNew 2015-03-23 02:55:42 UTC (rev 61059) @@ -291,6 +291,14 @@ Contributor(s): Alex Peshkov <peshkoff at mail.ru> + * New feature CORE-4717 + Aggregate statistical functions COVAR_SAMP, COVAR_POP and CORR + See also: + /doc/sql.extensions/README.statistical_functions.txt + Contributor(s): + Hajime Nakagami <nakagami at gmail.com> + Adriano dos Santos Fernandes <adrianosf at gmail.com> + * New feature CORE-4714 Aggregate statistical functions STDDEV_POP, STDDEV_SAMP, VAR_POP and VAR_SAMP See also: Modified: firebird/trunk/doc/sql.extensions/README.statistical_functions.txt =================================================================== --- firebird/trunk/doc/sql.extensions/README.statistical_functions.txt 2015-03-23 00:20:24 UTC (rev 61058) +++ firebird/trunk/doc/sql.extensions/README.statistical_functions.txt 2015-03-23 02:55:42 UTC (rev 61059) @@ -5,22 +5,40 @@ By the SQL specification, some statistical functions are defined. Function about variance and standard deviation are bellow. +VAR_SAMP: return the sample variance. + eq. (SUM(<expr> ^ 2) - SUM(<expr>) ^ 2 / COUNT(<expr>)) / (COUNT(<expr>) - 1) + VAR_POP: return the population variance. -VAR_SAMP: return the sample variance. -STDDEV_SAMP: return the sample standard deviation . + eq. (SUM(<expr> ^ 2) - SUM(<expr>) ^ 2 / COUNT(<expr>)) / COUNT(<expr>) + +STDDEV_SAMP: return the sample standard deviation. + eq. SQRT(VAR_SAMP(<expr)) + STDDEV_POP: return the population standard deviation. + eq. SQRT(VAR_POP(<expr>)) -VAR_POP(<expr>) is equivalent to (SUM(<expr> ^ 2) - SUM(<expr>) ^ 2 / COUNT(<expr>)) / COUNT(<expr>). -VAR_SAMP(<expr>) is equivalent to (SUM(<expr> ^ 2) - SUM(<expr>) ^ 2 / COUNT(<expr>)) / (COUNT(<expr>) - 1). -STDDEV_POP(<expr>) is equivalent to SQRT(VAR_POP(<expr>)). -STDDEV_SAMP(<expr>) is equivalent to SQRT(VAR_SAMP(<expr)). +COVAR_SAMP: return the sample population. + eq. (SUM(<expr1> * <expr2>) - SUM(<expr1>) * SUM(<expr2>) / COUNT(*)) / (COUNT(*) - 1) +COVAR_POP: return the population covariance. + eq. (SUM(<expr1> * <expr2>) - SUM(<expr1>) * SUM(<expr2>) / COUNT(*)) / COUNT(*) + +CORR: returns the coefficient of correlation. + eq. COVAR_POP(<expr1>, <expr2>) / (STDDEV_POP(<expr2>) * STDDEV_POP(<expr1>)) + Author: Hajime Nakagami <nak...@gm...> Syntax: - <statistical function> ::= <statistical function name>(<expr>) - <statistical function name> := { VAR_POP | VAR_SAMP | STDDEV_POP | STDDEV_SAMP } + <single param statistical function> ::= <single param statistical function name>(<expr>) + <single param statistical function name> := { VAR_POP | VAR_SAMP | STDDEV_POP | STDDEV_SAMP } + <dual param statistical function> ::= <dual param statistical function name>(<expr1>, <expr2>) + <dual param statistical function name> := { COVAR_POP | COVAR_SAMP | CORR } + +Note: + If VAR_SAMP, STDDEV_SAMP, COVAR_SAMP and result count is 0 or 1, return NULL. + If VAR_POP, STDDEV_POP, COVAR_POP, CORR and result count is 0, return NULL. + Example: SELECT STDDEV_SAMP(salary) FROM employees; Modified: firebird/trunk/src/dsql/AggNodes.cpp =================================================================== --- firebird/trunk/src/dsql/AggNodes.cpp 2015-03-23 00:20:24 UTC (rev 61058) +++ firebird/trunk/src/dsql/AggNodes.cpp 2015-03-23 02:55:42 UTC (rev 61059) @@ -1313,4 +1313,179 @@ } +//-------------------- + + +static AggNode::Register<CorrAggNode> coVarSampAggInfo("COVAR_SAMP", blr_agg_covar_samp); +static AggNode::Register<CorrAggNode> coVarPopAggInfo("COVAR_POP", blr_agg_covar_pop); +static AggNode::Register<CorrAggNode> corrAggInfo("CORR", blr_agg_corr); + +CorrAggNode::CorrAggNode(MemoryPool& pool, CorrType aType, ValueExprNode* aArg, ValueExprNode* aArg2) + : AggNode(pool, + (aType == CorrAggNode::TYPE_COVAR_SAMP ? coVarSampAggInfo : + aType == CorrAggNode::TYPE_COVAR_POP ? coVarPopAggInfo : + corrAggInfo), + false, false, aArg), + type(aType), + arg2(aArg2), + impure2Offset(0) +{ + addChildNode(arg2, arg2); +} + +void CorrAggNode::aggPostRse(thread_db* tdbb, CompilerScratch* csb) +{ + AggNode::aggPostRse(tdbb, csb); + impure2Offset = CMP_impure(csb, sizeof(CorrImpure)); +} + +DmlNode* CorrAggNode::parse(thread_db* tdbb, MemoryPool& pool, CompilerScratch* csb, const UCHAR blrOp) +{ + CorrType type; + + switch (blrOp) + { + case blr_agg_covar_samp: + type = TYPE_COVAR_SAMP; + break; + + case blr_agg_covar_pop: + type = TYPE_COVAR_POP; + break; + + case blr_agg_corr: + type = TYPE_CORR; + break; + + default: + fb_assert(false); + } + + ValueExprNode* a1 = PAR_parse_value(tdbb, csb); + ValueExprNode* a2 = PAR_parse_value(tdbb, csb); + return FB_NEW(pool) CorrAggNode(pool, type, a1, a2); +} + +void CorrAggNode::make(DsqlCompilerScratch* dsqlScratch, dsc* desc) +{ + desc->makeDouble(); + desc->setNullable(true); +} + +void CorrAggNode::getDesc(thread_db* tdbb, CompilerScratch* csb, dsc* desc) +{ + desc->makeDouble(); +} + +ValueExprNode* CorrAggNode::copy(thread_db* tdbb, NodeCopier& copier) const +{ + CorrAggNode* node = FB_NEW(*tdbb->getDefaultPool()) CorrAggNode(*tdbb->getDefaultPool(), type); + node->nodScale = nodScale; + node->arg = copier.copy(tdbb, arg); + node->arg2 = copier.copy(tdbb, arg2); + return node; +} + +void CorrAggNode::aggInit(thread_db* tdbb, jrd_req* request) const +{ + AggNode::aggInit(tdbb, request); + + impure_value_ex* impure = request->getImpure<impure_value_ex>(impureOffset); + impure->make_double(0); + + CorrImpure* impure2 = request->getImpure<CorrImpure>(impure2Offset); + impure2->x = impure2->x2 = impure2->y = impure2->y2 = impure2->xy = 0.0; +} + +bool CorrAggNode::aggPass(thread_db* tdbb, jrd_req* request) const +{ + dsc* desc = NULL; + dsc* desc2 = NULL; + + desc = EVL_expr(tdbb, request, arg); + if (request->req_flags & req_null) + return false; + + desc2 = EVL_expr(tdbb, request, arg2); + if (request->req_flags & req_null) + return false; + + impure_value_ex* impure = request->getImpure<impure_value_ex>(impureOffset); + ++impure->vlux_count; + + const double y = MOV_get_double(desc); + const double x = MOV_get_double(desc2); + + CorrImpure* impure2 = request->getImpure<CorrImpure>(impure2Offset); + impure2->x += x; + impure2->x2 += x * x; + impure2->y += y; + impure2->y2 += y * y; + impure2->xy += x * y; + + return true; +} + +void CorrAggNode::aggPass(thread_db* /*tdbb*/, jrd_req* /*request*/, dsc* /*desc*/) const +{ + fb_assert(false); +} + +dsc* CorrAggNode::aggExecute(thread_db* tdbb, jrd_req* request) const +{ + impure_value_ex* impure = request->getImpure<impure_value_ex>(impureOffset); + CorrImpure* impure2 = request->getImpure<CorrImpure>(impure2Offset); + double d; + + switch (type) + { + case TYPE_COVAR_SAMP: + if (impure->vlux_count < 2) + return NULL; + d = (impure2->xy - impure2->y * impure2->x / impure->vlux_count) / (impure->vlux_count - 1); + break; + + case TYPE_COVAR_POP: + if (impure->vlux_count == 0) + return NULL; + d = (impure2->xy - impure2->y * impure2->x / impure->vlux_count) / impure->vlux_count; + break; + + case TYPE_CORR: + { + // COVAR_POP(Y, X) / (STDDEV_POP(X) * STDDEV_POP(Y)) + if (impure->vlux_count == 0) + return NULL; + + const double covarPop = (impure2->xy - impure2->y * impure2->x / impure->vlux_count) / + impure->vlux_count; + const double varPopX = (impure2->x2 - impure2->x * impure2->x / impure->vlux_count) / + impure->vlux_count; + const double varPopY = (impure2->y2 - impure2->y * impure2->y / impure->vlux_count) / + impure->vlux_count; + const double divisor = sqrt(varPopX) * sqrt(varPopY); + + if (divisor == 0.0) + return NULL; + + d = covarPop / divisor; + break; + } + } + + dsc temp; + temp.makeDouble(&d); + + EVL_make_value(tdbb, &temp, impure); + + return &impure->vlu_desc; +} + +AggNode* CorrAggNode::dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/ +{ + return FB_NEW(getPool()) CorrAggNode(getPool(), type, + doDsqlPass(dsqlScratch, arg), doDsqlPass(dsqlScratch, arg2)); +} + + } // namespace Jrd Modified: firebird/trunk/src/dsql/AggNodes.h =================================================================== --- firebird/trunk/src/dsql/AggNodes.h 2015-03-23 00:20:24 UTC (rev 61058) +++ firebird/trunk/src/dsql/AggNodes.h 2015-03-23 02:55:42 UTC (rev 61059) @@ -192,6 +192,48 @@ ULONG impure2Offset; }; +class CorrAggNode : public AggNode +{ +public: + enum CorrType + { + TYPE_COVAR_SAMP, + TYPE_COVAR_POP, + TYPE_CORR + }; + + struct CorrImpure + { + double x, x2, y, y2, xy; + }; + + explicit CorrAggNode(MemoryPool& pool, CorrType aType, + ValueExprNode* aArg = NULL, ValueExprNode* aArg2 = NULL); + + virtual void aggPostRse(thread_db* tdbb, CompilerScratch* csb); + + static DmlNode* parse(thread_db* tdbb, MemoryPool& pool, CompilerScratch* csb, const UCHAR blrOp); + + virtual void make(DsqlCompilerScratch* dsqlScratch, dsc* desc); + virtual void getDesc(thread_db* tdbb, CompilerScratch* csb, dsc* desc); + virtual ValueExprNode* copy(thread_db* tdbb, NodeCopier& copier) const; + + virtual void aggInit(thread_db* tdbb, jrd_req* request) const; + virtual bool aggPass(thread_db* tdbb, jrd_req* request) const; + virtual void aggPass(thread_db* tdbb, jrd_req* request, dsc* desc) const; + virtual dsc* aggExecute(thread_db* tdbb, jrd_req* request) const; + +protected: + virtual AggNode* dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/; + +public: + const CorrType type; + NestConst<ValueExprNode> arg2; + +private: + ULONG impure2Offset; +}; + } // namespace #endif // DSQL_AGG_NODES_H Modified: firebird/trunk/src/dsql/parse.y =================================================================== --- firebird/trunk/src/dsql/parse.y 2015-03-23 00:20:24 UTC (rev 61058) +++ firebird/trunk/src/dsql/parse.y 2015-03-23 02:55:42 UTC (rev 61059) @@ -576,6 +576,9 @@ %token <metaNamePtr> STDDEV_POP %token <metaNamePtr> VAR_SAMP %token <metaNamePtr> VAR_POP +%token <metaNamePtr> COVAR_SAMP +%token <metaNamePtr> COVAR_POP +%token <metaNamePtr> CORR // precedence declarations for expression evaluation @@ -3814,6 +3817,9 @@ | START | SIMILAR // added in FB 2.5 | KW_BOOLEAN // added in FB 3.0 + | CORR + | COVAR_POP + | COVAR_SAMP | DETERMINISTIC | KW_FALSE | OFFSET @@ -6805,6 +6811,12 @@ { $$ = newNode<StdDevAggNode>(StdDevAggNode::TYPE_VAR_SAMP, $3); } | VAR_POP '(' value ')' { $$ = newNode<StdDevAggNode>(StdDevAggNode::TYPE_VAR_POP, $3); } + | COVAR_SAMP '(' value ',' value ')' + { $$ = newNode<CorrAggNode>(CorrAggNode::TYPE_COVAR_SAMP, $3, $5); } + | COVAR_POP '(' value ',' value ')' + { $$ = newNode<CorrAggNode>(CorrAggNode::TYPE_COVAR_POP, $3, $5); } + | CORR '(' value ',' value ')' + { $$ = newNode<CorrAggNode>(CorrAggNode::TYPE_CORR, $3, $5); } ; %type <aggNode> window_function Modified: firebird/trunk/src/jrd/blp.h =================================================================== --- firebird/trunk/src/jrd/blp.h 2015-03-23 00:20:24 UTC (rev 61058) +++ firebird/trunk/src/jrd/blp.h 2015-03-23 02:55:42 UTC (rev 61059) @@ -245,5 +245,8 @@ {"agg_stddev_pop", one}, {"agg_var_samp", one}, {"agg_var_pop", one}, + {"agg_covar_samp", two}, + {"agg_covar_pop", two}, + {"agg_corr", two}, {0, 0} }; Modified: firebird/trunk/src/jrd/blr.h =================================================================== --- firebird/trunk/src/jrd/blr.h 2015-03-23 00:20:24 UTC (rev 61058) +++ firebird/trunk/src/jrd/blr.h 2015-03-23 02:55:42 UTC (rev 61059) @@ -411,5 +411,8 @@ #define blr_agg_stddev_pop (unsigned char) 212 #define blr_agg_var_samp (unsigned char) 213 #define blr_agg_var_pop (unsigned char) 214 +#define blr_agg_covar_samp (unsigned char) 215 +#define blr_agg_covar_pop (unsigned char) 216 +#define blr_agg_corr (unsigned char) 217 #endif // JRD_BLR_H Modified: firebird/trunk/src/yvalve/keywords.cpp =================================================================== --- firebird/trunk/src/yvalve/keywords.cpp 2015-03-23 00:20:24 UTC (rev 61058) +++ firebird/trunk/src/yvalve/keywords.cpp 2015-03-23 02:55:42 UTC (rev 61059) @@ -129,10 +129,13 @@ {CONSTRAINT, "CONSTRAINT", 1, false}, {CONTAINING, "CONTAINING", 1, false}, {CONTINUE, "CONTINUE", 2, true}, + {CORR, "CORR", 2, false}, {COS, "COS", 2, false}, {COSH, "COSH", 2, false}, {COT, "COT", 2, false}, {COUNT, "COUNT", 1, false}, + {COVAR_POP, "COVAR_POP", 2, false}, + {COVAR_SAMP, "COVAR_SAMP", 2, false}, {CREATE, "CREATE", 1, false}, {CROSS, "CROSS", 2, false}, {CSTRING, "CSTRING", 1, false}, This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |