pulling together some hints in this forum, I have created a new version of udf_median which works (seems to work) with MySQL 5.1.
Cheers,
Thomas Gutzmann
/* returns the median of the values in a distribution input parameters: data (real) number of decimals in result (int, optional) output: median value of the distribution (real) registering the function: CREATE AGGREGATE FUNCTION median RETURNS REAL SONAME 'udf_median.so'; getting rid of the function: DROP FUNCTION median;*/#ifdef STANDARD#include<stdio.h>#include<string.h>#ifdef __WIN__typedefunsigned__int64ulonglong;typedef__int64longlong;#elsetypedefunsignedlonglongulonglong;typedeflonglonglonglong;#endif /*__WIN__*/#else#include<my_global.h>#include<my_sys.h>#endif#include<mysql.h>#include<m_ctype.h>#include<m_string.h> #ifdef HAVE_DLOPEN#define BUFFERSIZE 1024 extern"C"{my_boolmedian_init(UDF_INIT*initid,UDF_ARGS*args,char*message);voidmedian_deinit(UDF_INIT*initid);voidmedian_clear(UDF_INIT*initid,char*is_null,char*error);voidmedian_reset(UDF_INIT*initid,UDF_ARGS*args,char*is_null,char*error);voidmedian_add(UDF_INIT*initid,UDF_ARGS*args,char*is_null,char*error);doublemedian(UDF_INIT*initid,UDF_ARGS*args,char*is_null,char*error);}structmedian_data{unsignedlongcount;unsignedlongabscount;unsignedlongpages;double*values;};my_boolmedian_init(UDF_INIT*initid,UDF_ARGS*args,char*message){if(args->arg_count<1||args->arg_count>2){strcpy(message,"wrong number of arguments: median() requires one or two arguments");return1;}if(args->arg_type[0]!=REAL_RESULT){strcpy(message,"median() requires a real as parameter 1");return1;}if(args->arg_count>1&&(args->arg_type[1]!=INT_RESULT)){strcpy(message,"median() requires an int as parameter 2");return1;}initid->decimals=2;if(args->arg_count==2&&(*((ulong*)args->args[1])<=16)){initid->decimals=*((ulong*)args->args[1]);}median_data*buffer=newmedian_data;buffer->count=0;buffer->abscount=0;buffer->pages=1;buffer->values=NULL;initid->maybe_null=1;initid->max_length=32;initid->ptr=(char*)buffer;return0;}voidmedian_deinit(UDF_INIT*initid){median_data*buffer=(median_data*)initid->ptr;if(buffer->values!=NULL){free(buffer->values);buffer->values=NULL;}deleteinitid->ptr;}voidmedian_clear(UDF_INIT*initid,char*is_null,char*message){median_data*buffer=(median_data*)initid->ptr;buffer->count=0;buffer->abscount=0;buffer->pages=1;*is_null=0;if(buffer->values!=NULL){free(buffer->values);buffer->values=NULL;}buffer->values=(double*)malloc(BUFFERSIZE*sizeof(double));}voidmedian_reset(UDF_INIT*initid,UDF_ARGS*args,char*is_null,char*is_error){median_data*buffer=(median_data*)initid->ptr;buffer->count=0;buffer->abscount=0;buffer->pages=1;*is_null=0;*is_error=0;if(buffer->values!=NULL){free(buffer->values);buffer->values=NULL;}buffer->values=(double*)malloc(BUFFERSIZE*sizeof(double));median_add(initid,args,is_null,is_error);}voidmedian_add(UDF_INIT*initid,UDF_ARGS*args,char*is_null,char*is_error){if(args->args[0]!=NULL){median_data*buffer=(median_data*)initid->ptr;if(buffer->count>=BUFFERSIZE){buffer->pages++;buffer->count=0;buffer->values=(double*)realloc(buffer->values,BUFFERSIZE*buffer->pages*sizeof(double));}buffer->values[buffer->abscount++]=*((double*)args->args[0]);buffer->count++;}}intcompare_doubles(constvoid*a,constvoid*b){constdouble*da=(constdouble*)a;constdouble*db=(constdouble*)b;return(*da>*db)-(*da<*db);}doublemedian(UDF_INIT*initid,UDF_ARGS*args,char*is_null,char*is_error){median_data*buffer=(median_data*)initid->ptr;if(buffer->abscount==0||*is_error!=0){*is_null=1;return0.0;}*is_null=0;if(buffer->abscount==1){returnbuffer->values[0];}qsort(buffer->values,buffer->abscount,sizeof(double),compare_doubles);if((buffer->abscount&1)==1){returnbuffer->values[(buffer->abscount-1)/2];}else{return(buffer->values[(buffer->abscount-2)/2]+buffer->values[buffer->abscount/2])/2.0;}}#endif
If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
If there is any interest in a percentile function, I modified the median for this…
perc(val, dec, percentile) so perc(val, 3, 50) == median(val, 3);
Built with mysql-5.5.15
Tested on
5.1.41-3ubuntu12
/* returns the nth percentile of the values in a distribution based on linear regression model copied from median input parameters: data (real) number of decimals in result (int, optional) percentile (int, optional) [default=50] median output: perc value of the distribution (real) registering the function: CREATE AGGREGATE FUNCTION perc RETURNS REAL SONAME 'udf_perc.so'; getting rid of the function: DROP FUNCTION perc;*/#ifdef STANDARD#include<stdio.h>#include<string.h>#ifdef __WIN__typedefunsigned__int64ulonglong;typedef__int64longlong;#elsetypedefunsignedlonglongulonglong;typedeflonglonglonglong;#endif /*__WIN__*/#else#include<my_global.h>#include<my_sys.h>#endif#include<mysql.h>#include<m_ctype.h>#include<m_string.h> #ifdef HAVE_DLOPEN#define BUFFERSIZE 1024 extern"C"{my_boolperc_init(UDF_INIT*initid,UDF_ARGS*args,char*message);voidperc_deinit(UDF_INIT*initid);voidperc_reset(UDF_INIT*initid,UDF_ARGS*args,char*is_null,char*error);voidperc_add(UDF_INIT*initid,UDF_ARGS*args,char*is_null,char*error);doubleperc(UDF_INIT*initid,UDF_ARGS*args,char*is_null,char*error);voidperc_clear(UDF_INIT*initid,char*is_null,char*error);}structperc_data{unsignedlongpercentile;unsignedlongcount;unsignedlongabscount;unsignedlongpages;double*values;};voidperc_clear(UDF_INIT*initid,char*is_null,char*message){perc_data*buffer=(perc_data*)initid->ptr;buffer->count=0;buffer->abscount=0;buffer->pages=1;*is_null=0;if(buffer->values!=NULL){free(buffer->values);buffer->values=NULL;}buffer->values=(double*)malloc(BUFFERSIZE*sizeof(double));}my_boolperc_init(UDF_INIT*initid,UDF_ARGS*args,char*message){if(args->arg_count<1||args->arg_count>3){strcpy(message,"wrong number of arguments: perc(val, [decimals(2), [percentile(50)] ])");return1;}args->arg_type[0]=REAL_RESULT;if(args->arg_type[0]!=REAL_RESULT){strcpy(message,"perc() requires a real as parameter 1");return1;}if(args->arg_count>1&&(args->arg_type[1]!=INT_RESULT)){strcpy(message,"perc() requires an int as parameter 2");return1;}initid->decimals=2;if(args->arg_count>=2&&(*((ulong*)args->args[1])<=16)){initid->decimals=*((ulong*)args->args[1]);if(args->arg_count>=3&&(*((ulong*)args->args[2])<=100)){initid->extension=(void*)args->args[2];}}perc_data*buffer=newperc_data;buffer->count=0;buffer->abscount=0;buffer->pages=1;buffer->values=NULL;buffer->percentile=*((ulong*)initid->extension);initid->maybe_null=1;initid->max_length=32;initid->ptr=(char*)buffer;return0;}voidperc_deinit(UDF_INIT*initid){perc_data*buffer=(perc_data*)initid->ptr;if(buffer->values!=NULL){free(buffer->values);buffer->values=NULL;}deleteinitid->ptr;}voidperc_reset(UDF_INIT*initid,UDF_ARGS*args,char*is_null,char*is_error){perc_data*buffer=(perc_data*)initid->ptr;buffer->count=0;buffer->abscount=0;buffer->pages=1;*is_null=0;*is_error=0;if(buffer->values!=NULL){free(buffer->values);buffer->values=NULL;}buffer->values=(double*)malloc(BUFFERSIZE*sizeof(double));perc_add(initid,args,is_null,is_error);}voidperc_add(UDF_INIT*initid,UDF_ARGS*args,char*is_null,char*is_error){if(args->args[0]!=NULL){perc_data*buffer=(perc_data*)initid->ptr;if(buffer->count>=BUFFERSIZE){buffer->pages++;buffer->count=0;buffer->values=(double*)realloc(buffer->values,BUFFERSIZE*buffer->pages*sizeof(double));}buffer->values[buffer->abscount++]=*((double*)args->args[0]);buffer->count++;}}intcompare_doubles(constvoid*a,constvoid*b){constdouble*da=(constdouble*)a;constdouble*db=(constdouble*)b;return(*da>*db)-(*da<*db);}doubleperc(UDF_INIT*initid,UDF_ARGS*args,char*is_null,char*is_error){floatk,f,c;doubled0,d1;perc_data*buffer=(perc_data*)initid->ptr;if(buffer->abscount==0||*is_error!=0){*is_null=1;return0.0;}*is_null=0;if(buffer->abscount==1){returnbuffer->values[0];}qsort(buffer->values,buffer->abscount,sizeof(double),compare_doubles);k=((double)buffer->abscount*(double)buffer->percentile)/100.0;f=(float)((int)k);c=(float)((int)(k+0.5));if(f==c)returnbuffer->values[(int)k];else{// linear regresiond0=buffer->values[(int)f];d1=buffer->values[(int)c];return(d1-d0)*(k-f)+d0;}}#endif
If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
Hi,
pulling together some hints in this forum, I have created a new version of udf_median which works (seems to work) with MySQL 5.1.
Cheers,
Thomas Gutzmann
If there is any interest in a percentile function, I modified the median for this…
perc(val, dec, percentile) so perc(val, 3, 50) == median(val, 3);
Built with mysql-5.5.15
Tested on
5.1.41-3ubuntu12