|
From: <asf...@us...> - 2015-03-23 02:55:44
|
Revision: 61059
http://sourceforge.net/p/firebird/code/61059
Author: asfernandes
Date: 2015-03-23 02:55:42 +0000 (Mon, 23 Mar 2015)
Log Message:
-----------
Feature CORE-4717 - Aggregate statistical functions COVAR_SAMP, COVAR_POP and CORR - contributed by Hajime Nakagami.
Modified Paths:
--------------
firebird/trunk/doc/WhatsNew
firebird/trunk/doc/sql.extensions/README.statistical_functions.txt
firebird/trunk/src/dsql/AggNodes.cpp
firebird/trunk/src/dsql/AggNodes.h
firebird/trunk/src/dsql/parse.y
firebird/trunk/src/jrd/blp.h
firebird/trunk/src/jrd/blr.h
firebird/trunk/src/yvalve/keywords.cpp
Modified: firebird/trunk/doc/WhatsNew
===================================================================
--- firebird/trunk/doc/WhatsNew 2015-03-23 00:20:24 UTC (rev 61058)
+++ firebird/trunk/doc/WhatsNew 2015-03-23 02:55:42 UTC (rev 61059)
@@ -291,6 +291,14 @@
Contributor(s):
Alex Peshkov <peshkoff at mail.ru>
+ * New feature CORE-4717
+ Aggregate statistical functions COVAR_SAMP, COVAR_POP and CORR
+ See also:
+ /doc/sql.extensions/README.statistical_functions.txt
+ Contributor(s):
+ Hajime Nakagami <nakagami at gmail.com>
+ Adriano dos Santos Fernandes <adrianosf at gmail.com>
+
* New feature CORE-4714
Aggregate statistical functions STDDEV_POP, STDDEV_SAMP, VAR_POP and VAR_SAMP
See also:
Modified: firebird/trunk/doc/sql.extensions/README.statistical_functions.txt
===================================================================
--- firebird/trunk/doc/sql.extensions/README.statistical_functions.txt 2015-03-23 00:20:24 UTC (rev 61058)
+++ firebird/trunk/doc/sql.extensions/README.statistical_functions.txt 2015-03-23 02:55:42 UTC (rev 61059)
@@ -5,22 +5,40 @@
By the SQL specification, some statistical functions are defined.
Function about variance and standard deviation are bellow.
+VAR_SAMP: return the sample variance.
+ eq. (SUM(<expr> ^ 2) - SUM(<expr>) ^ 2 / COUNT(<expr>)) / (COUNT(<expr>) - 1)
+
VAR_POP: return the population variance.
-VAR_SAMP: return the sample variance.
-STDDEV_SAMP: return the sample standard deviation .
+ eq. (SUM(<expr> ^ 2) - SUM(<expr>) ^ 2 / COUNT(<expr>)) / COUNT(<expr>)
+
+STDDEV_SAMP: return the sample standard deviation.
+ eq. SQRT(VAR_SAMP(<expr))
+
STDDEV_POP: return the population standard deviation.
+ eq. SQRT(VAR_POP(<expr>))
-VAR_POP(<expr>) is equivalent to (SUM(<expr> ^ 2) - SUM(<expr>) ^ 2 / COUNT(<expr>)) / COUNT(<expr>).
-VAR_SAMP(<expr>) is equivalent to (SUM(<expr> ^ 2) - SUM(<expr>) ^ 2 / COUNT(<expr>)) / (COUNT(<expr>) - 1).
-STDDEV_POP(<expr>) is equivalent to SQRT(VAR_POP(<expr>)).
-STDDEV_SAMP(<expr>) is equivalent to SQRT(VAR_SAMP(<expr)).
+COVAR_SAMP: return the sample population.
+ eq. (SUM(<expr1> * <expr2>) - SUM(<expr1>) * SUM(<expr2>) / COUNT(*)) / (COUNT(*) - 1)
+COVAR_POP: return the population covariance.
+ eq. (SUM(<expr1> * <expr2>) - SUM(<expr1>) * SUM(<expr2>) / COUNT(*)) / COUNT(*)
+
+CORR: returns the coefficient of correlation.
+ eq. COVAR_POP(<expr1>, <expr2>) / (STDDEV_POP(<expr2>) * STDDEV_POP(<expr1>))
+
Author:
Hajime Nakagami <nak...@gm...>
Syntax:
- <statistical function> ::= <statistical function name>(<expr>)
- <statistical function name> := { VAR_POP | VAR_SAMP | STDDEV_POP | STDDEV_SAMP }
+ <single param statistical function> ::= <single param statistical function name>(<expr>)
+ <single param statistical function name> := { VAR_POP | VAR_SAMP | STDDEV_POP | STDDEV_SAMP }
+ <dual param statistical function> ::= <dual param statistical function name>(<expr1>, <expr2>)
+ <dual param statistical function name> := { COVAR_POP | COVAR_SAMP | CORR }
+
+Note:
+ If VAR_SAMP, STDDEV_SAMP, COVAR_SAMP and result count is 0 or 1, return NULL.
+ If VAR_POP, STDDEV_POP, COVAR_POP, CORR and result count is 0, return NULL.
+
Example:
SELECT STDDEV_SAMP(salary) FROM employees;
Modified: firebird/trunk/src/dsql/AggNodes.cpp
===================================================================
--- firebird/trunk/src/dsql/AggNodes.cpp 2015-03-23 00:20:24 UTC (rev 61058)
+++ firebird/trunk/src/dsql/AggNodes.cpp 2015-03-23 02:55:42 UTC (rev 61059)
@@ -1313,4 +1313,179 @@
}
+//--------------------
+
+
+static AggNode::Register<CorrAggNode> coVarSampAggInfo("COVAR_SAMP", blr_agg_covar_samp);
+static AggNode::Register<CorrAggNode> coVarPopAggInfo("COVAR_POP", blr_agg_covar_pop);
+static AggNode::Register<CorrAggNode> corrAggInfo("CORR", blr_agg_corr);
+
+CorrAggNode::CorrAggNode(MemoryPool& pool, CorrType aType, ValueExprNode* aArg, ValueExprNode* aArg2)
+ : AggNode(pool,
+ (aType == CorrAggNode::TYPE_COVAR_SAMP ? coVarSampAggInfo :
+ aType == CorrAggNode::TYPE_COVAR_POP ? coVarPopAggInfo :
+ corrAggInfo),
+ false, false, aArg),
+ type(aType),
+ arg2(aArg2),
+ impure2Offset(0)
+{
+ addChildNode(arg2, arg2);
+}
+
+void CorrAggNode::aggPostRse(thread_db* tdbb, CompilerScratch* csb)
+{
+ AggNode::aggPostRse(tdbb, csb);
+ impure2Offset = CMP_impure(csb, sizeof(CorrImpure));
+}
+
+DmlNode* CorrAggNode::parse(thread_db* tdbb, MemoryPool& pool, CompilerScratch* csb, const UCHAR blrOp)
+{
+ CorrType type;
+
+ switch (blrOp)
+ {
+ case blr_agg_covar_samp:
+ type = TYPE_COVAR_SAMP;
+ break;
+
+ case blr_agg_covar_pop:
+ type = TYPE_COVAR_POP;
+ break;
+
+ case blr_agg_corr:
+ type = TYPE_CORR;
+ break;
+
+ default:
+ fb_assert(false);
+ }
+
+ ValueExprNode* a1 = PAR_parse_value(tdbb, csb);
+ ValueExprNode* a2 = PAR_parse_value(tdbb, csb);
+ return FB_NEW(pool) CorrAggNode(pool, type, a1, a2);
+}
+
+void CorrAggNode::make(DsqlCompilerScratch* dsqlScratch, dsc* desc)
+{
+ desc->makeDouble();
+ desc->setNullable(true);
+}
+
+void CorrAggNode::getDesc(thread_db* tdbb, CompilerScratch* csb, dsc* desc)
+{
+ desc->makeDouble();
+}
+
+ValueExprNode* CorrAggNode::copy(thread_db* tdbb, NodeCopier& copier) const
+{
+ CorrAggNode* node = FB_NEW(*tdbb->getDefaultPool()) CorrAggNode(*tdbb->getDefaultPool(), type);
+ node->nodScale = nodScale;
+ node->arg = copier.copy(tdbb, arg);
+ node->arg2 = copier.copy(tdbb, arg2);
+ return node;
+}
+
+void CorrAggNode::aggInit(thread_db* tdbb, jrd_req* request) const
+{
+ AggNode::aggInit(tdbb, request);
+
+ impure_value_ex* impure = request->getImpure<impure_value_ex>(impureOffset);
+ impure->make_double(0);
+
+ CorrImpure* impure2 = request->getImpure<CorrImpure>(impure2Offset);
+ impure2->x = impure2->x2 = impure2->y = impure2->y2 = impure2->xy = 0.0;
+}
+
+bool CorrAggNode::aggPass(thread_db* tdbb, jrd_req* request) const
+{
+ dsc* desc = NULL;
+ dsc* desc2 = NULL;
+
+ desc = EVL_expr(tdbb, request, arg);
+ if (request->req_flags & req_null)
+ return false;
+
+ desc2 = EVL_expr(tdbb, request, arg2);
+ if (request->req_flags & req_null)
+ return false;
+
+ impure_value_ex* impure = request->getImpure<impure_value_ex>(impureOffset);
+ ++impure->vlux_count;
+
+ const double y = MOV_get_double(desc);
+ const double x = MOV_get_double(desc2);
+
+ CorrImpure* impure2 = request->getImpure<CorrImpure>(impure2Offset);
+ impure2->x += x;
+ impure2->x2 += x * x;
+ impure2->y += y;
+ impure2->y2 += y * y;
+ impure2->xy += x * y;
+
+ return true;
+}
+
+void CorrAggNode::aggPass(thread_db* /*tdbb*/, jrd_req* /*request*/, dsc* /*desc*/) const
+{
+ fb_assert(false);
+}
+
+dsc* CorrAggNode::aggExecute(thread_db* tdbb, jrd_req* request) const
+{
+ impure_value_ex* impure = request->getImpure<impure_value_ex>(impureOffset);
+ CorrImpure* impure2 = request->getImpure<CorrImpure>(impure2Offset);
+ double d;
+
+ switch (type)
+ {
+ case TYPE_COVAR_SAMP:
+ if (impure->vlux_count < 2)
+ return NULL;
+ d = (impure2->xy - impure2->y * impure2->x / impure->vlux_count) / (impure->vlux_count - 1);
+ break;
+
+ case TYPE_COVAR_POP:
+ if (impure->vlux_count == 0)
+ return NULL;
+ d = (impure2->xy - impure2->y * impure2->x / impure->vlux_count) / impure->vlux_count;
+ break;
+
+ case TYPE_CORR:
+ {
+ // COVAR_POP(Y, X) / (STDDEV_POP(X) * STDDEV_POP(Y))
+ if (impure->vlux_count == 0)
+ return NULL;
+
+ const double covarPop = (impure2->xy - impure2->y * impure2->x / impure->vlux_count) /
+ impure->vlux_count;
+ const double varPopX = (impure2->x2 - impure2->x * impure2->x / impure->vlux_count) /
+ impure->vlux_count;
+ const double varPopY = (impure2->y2 - impure2->y * impure2->y / impure->vlux_count) /
+ impure->vlux_count;
+ const double divisor = sqrt(varPopX) * sqrt(varPopY);
+
+ if (divisor == 0.0)
+ return NULL;
+
+ d = covarPop / divisor;
+ break;
+ }
+ }
+
+ dsc temp;
+ temp.makeDouble(&d);
+
+ EVL_make_value(tdbb, &temp, impure);
+
+ return &impure->vlu_desc;
+}
+
+AggNode* CorrAggNode::dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/
+{
+ return FB_NEW(getPool()) CorrAggNode(getPool(), type,
+ doDsqlPass(dsqlScratch, arg), doDsqlPass(dsqlScratch, arg2));
+}
+
+
} // namespace Jrd
Modified: firebird/trunk/src/dsql/AggNodes.h
===================================================================
--- firebird/trunk/src/dsql/AggNodes.h 2015-03-23 00:20:24 UTC (rev 61058)
+++ firebird/trunk/src/dsql/AggNodes.h 2015-03-23 02:55:42 UTC (rev 61059)
@@ -192,6 +192,48 @@
ULONG impure2Offset;
};
+class CorrAggNode : public AggNode
+{
+public:
+ enum CorrType
+ {
+ TYPE_COVAR_SAMP,
+ TYPE_COVAR_POP,
+ TYPE_CORR
+ };
+
+ struct CorrImpure
+ {
+ double x, x2, y, y2, xy;
+ };
+
+ explicit CorrAggNode(MemoryPool& pool, CorrType aType,
+ ValueExprNode* aArg = NULL, ValueExprNode* aArg2 = NULL);
+
+ virtual void aggPostRse(thread_db* tdbb, CompilerScratch* csb);
+
+ static DmlNode* parse(thread_db* tdbb, MemoryPool& pool, CompilerScratch* csb, const UCHAR blrOp);
+
+ virtual void make(DsqlCompilerScratch* dsqlScratch, dsc* desc);
+ virtual void getDesc(thread_db* tdbb, CompilerScratch* csb, dsc* desc);
+ virtual ValueExprNode* copy(thread_db* tdbb, NodeCopier& copier) const;
+
+ virtual void aggInit(thread_db* tdbb, jrd_req* request) const;
+ virtual bool aggPass(thread_db* tdbb, jrd_req* request) const;
+ virtual void aggPass(thread_db* tdbb, jrd_req* request, dsc* desc) const;
+ virtual dsc* aggExecute(thread_db* tdbb, jrd_req* request) const;
+
+protected:
+ virtual AggNode* dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/;
+
+public:
+ const CorrType type;
+ NestConst<ValueExprNode> arg2;
+
+private:
+ ULONG impure2Offset;
+};
+
} // namespace
#endif // DSQL_AGG_NODES_H
Modified: firebird/trunk/src/dsql/parse.y
===================================================================
--- firebird/trunk/src/dsql/parse.y 2015-03-23 00:20:24 UTC (rev 61058)
+++ firebird/trunk/src/dsql/parse.y 2015-03-23 02:55:42 UTC (rev 61059)
@@ -576,6 +576,9 @@
%token <metaNamePtr> STDDEV_POP
%token <metaNamePtr> VAR_SAMP
%token <metaNamePtr> VAR_POP
+%token <metaNamePtr> COVAR_SAMP
+%token <metaNamePtr> COVAR_POP
+%token <metaNamePtr> CORR
// precedence declarations for expression evaluation
@@ -3814,6 +3817,9 @@
| START
| SIMILAR // added in FB 2.5
| KW_BOOLEAN // added in FB 3.0
+ | CORR
+ | COVAR_POP
+ | COVAR_SAMP
| DETERMINISTIC
| KW_FALSE
| OFFSET
@@ -6805,6 +6811,12 @@
{ $$ = newNode<StdDevAggNode>(StdDevAggNode::TYPE_VAR_SAMP, $3); }
| VAR_POP '(' value ')'
{ $$ = newNode<StdDevAggNode>(StdDevAggNode::TYPE_VAR_POP, $3); }
+ | COVAR_SAMP '(' value ',' value ')'
+ { $$ = newNode<CorrAggNode>(CorrAggNode::TYPE_COVAR_SAMP, $3, $5); }
+ | COVAR_POP '(' value ',' value ')'
+ { $$ = newNode<CorrAggNode>(CorrAggNode::TYPE_COVAR_POP, $3, $5); }
+ | CORR '(' value ',' value ')'
+ { $$ = newNode<CorrAggNode>(CorrAggNode::TYPE_CORR, $3, $5); }
;
%type <aggNode> window_function
Modified: firebird/trunk/src/jrd/blp.h
===================================================================
--- firebird/trunk/src/jrd/blp.h 2015-03-23 00:20:24 UTC (rev 61058)
+++ firebird/trunk/src/jrd/blp.h 2015-03-23 02:55:42 UTC (rev 61059)
@@ -245,5 +245,8 @@
{"agg_stddev_pop", one},
{"agg_var_samp", one},
{"agg_var_pop", one},
+ {"agg_covar_samp", two},
+ {"agg_covar_pop", two},
+ {"agg_corr", two},
{0, 0}
};
Modified: firebird/trunk/src/jrd/blr.h
===================================================================
--- firebird/trunk/src/jrd/blr.h 2015-03-23 00:20:24 UTC (rev 61058)
+++ firebird/trunk/src/jrd/blr.h 2015-03-23 02:55:42 UTC (rev 61059)
@@ -411,5 +411,8 @@
#define blr_agg_stddev_pop (unsigned char) 212
#define blr_agg_var_samp (unsigned char) 213
#define blr_agg_var_pop (unsigned char) 214
+#define blr_agg_covar_samp (unsigned char) 215
+#define blr_agg_covar_pop (unsigned char) 216
+#define blr_agg_corr (unsigned char) 217
#endif // JRD_BLR_H
Modified: firebird/trunk/src/yvalve/keywords.cpp
===================================================================
--- firebird/trunk/src/yvalve/keywords.cpp 2015-03-23 00:20:24 UTC (rev 61058)
+++ firebird/trunk/src/yvalve/keywords.cpp 2015-03-23 02:55:42 UTC (rev 61059)
@@ -129,10 +129,13 @@
{CONSTRAINT, "CONSTRAINT", 1, false},
{CONTAINING, "CONTAINING", 1, false},
{CONTINUE, "CONTINUE", 2, true},
+ {CORR, "CORR", 2, false},
{COS, "COS", 2, false},
{COSH, "COSH", 2, false},
{COT, "COT", 2, false},
{COUNT, "COUNT", 1, false},
+ {COVAR_POP, "COVAR_POP", 2, false},
+ {COVAR_SAMP, "COVAR_SAMP", 2, false},
{CREATE, "CREATE", 1, false},
{CROSS, "CROSS", 2, false},
{CSTRING, "CSTRING", 1, false},
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|