Menu

Working version for udf_median

PERF::ACT
2010-05-05
2013-04-15
  • PERF::ACT

    PERF::ACT - 2010-05-05

    Hi,

    pulling together some hints in this forum, I have created a new version of udf_median which works (seems to work) with MySQL 5.1.

    Cheers,

    Thomas Gutzmann

    /*
      returns the median of the values in a distribution 
      input parameters:
      data (real)
      number of decimals in result (int, optional)
      output:
      median value of the distribution (real)
      registering the function:
      CREATE AGGREGATE FUNCTION median RETURNS REAL SONAME 'udf_median.so';
      getting rid of the function:
      DROP FUNCTION median;
    */
    #ifdef STANDARD
    #include <stdio.h>
    #include <string.h>
    #ifdef __WIN__
    typedef unsigned __int64 ulonglong; 
    typedef __int64 longlong;
    #else
    typedef unsigned long long ulonglong;
    typedef long long longlong;
    #endif /*__WIN__*/
    #else
    #include <my_global.h>
    #include <my_sys.h>
    #endif
    #include <mysql.h>
    #include <m_ctype.h>
    #include <m_string.h>       
    #ifdef HAVE_DLOPEN
    #define BUFFERSIZE 1024 
    extern "C" {
    my_bool median_init( UDF_INIT* initid, UDF_ARGS* args, char* message );
    void median_deinit( UDF_INIT* initid );
    void median_clear( UDF_INIT* initid, char* is_null, char *error );
    void median_reset( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char *error );
    void median_add( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char *error );
    double median( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char *error );
    }
    struct median_data
    {
      unsigned long count;
      unsigned long abscount;
      unsigned long pages;
      double *values;
    };
    my_bool median_init( UDF_INIT* initid, UDF_ARGS* args, char* message )
    {
      if (args->arg_count < 1 || args->arg_count>2)
      {
        strcpy(message,"wrong number of arguments: median() requires one or two arguments");
        return 1;
      }
      if (args->arg_type[0]!=REAL_RESULT)
      {
        strcpy(message,"median() requires a real as parameter 1");
        return 1;
      }
      if (args->arg_count>1 && (args->arg_type[1]!=INT_RESULT))
      {
        strcpy(message,"median() requires an int as parameter 2");
        return 1;
      }
      initid->decimals=2;
      if (args->arg_count==2 && (*((ulong*)args->args[1])<=16))
      {
        initid->decimals=*((ulong*)args->args[1]);
      }
      median_data *buffer = new median_data;
      buffer->count = 0;
      buffer->abscount=0;
      buffer->pages = 1;
      buffer->values = NULL;
      initid->maybe_null    = 1;
      initid->max_length    = 32;
      initid->ptr = (char*)buffer;
      return 0;
    }
    void median_deinit( UDF_INIT* initid )
    {
      median_data *buffer = (median_data*)initid->ptr;
      if (buffer->values != NULL)
      {
        free(buffer->values);
        buffer->values=NULL;
      }
      delete initid->ptr;
    }
    void median_clear(UDF_INIT* initid, char* is_null, char* message)
    {
      median_data *buffer = (median_data*)initid->ptr;
      buffer->count = 0;
      buffer->abscount=0;
      buffer->pages = 1;
      *is_null = 0;
      
      if (buffer->values != NULL)
      {
        free(buffer->values);
        buffer->values=NULL;
      }
      
      buffer->values=(double *) malloc(BUFFERSIZE*sizeof(double));
    }
    void median_reset( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char* is_error )
    {
      median_data *buffer = (median_data*)initid->ptr;
      buffer->count = 0;
      buffer->abscount=0;
      buffer->pages = 1;
      *is_null = 0;
      *is_error = 0;
      if (buffer->values != NULL)
      {
        free(buffer->values);
        buffer->values=NULL;
      }
      buffer->values=(double *) malloc(BUFFERSIZE*sizeof(double));
      median_add( initid, args, is_null, is_error );
    }
    void median_add( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char* is_error )
    {
      if (args->args[0]!=NULL)
      {
        median_data *buffer = (median_data*)initid->ptr;
        if (buffer->count>=BUFFERSIZE)
        {
          buffer->pages++;
          buffer->count=0;
          buffer->values=(double *) realloc(buffer->values,BUFFERSIZE*buffer->pages*sizeof(double));
        }
        buffer->values[buffer->abscount++] = *((double*)args->args[0]);
        buffer->count++;
      }
    }
    int
    compare_doubles (const void *a, const void *b)
    {
      const double *da = (const double *) a;
      const double *db = (const double *) b;
      return (*da > *db) - (*da < *db);
    }
    double median( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char* is_error )
    {
      median_data* buffer = (median_data*)initid->ptr;
      if (buffer->abscount==0 || *is_error!=0)
      {
        *is_null = 1;
        return 0.0;
      }
      *is_null=0;
      if (buffer->abscount==1)
      {
        return buffer->values[0];
      }
      qsort(buffer->values,buffer->abscount,sizeof(double),compare_doubles);
      if ((buffer->abscount&1)==1) 
      {
        return buffer->values[(buffer->abscount-1)/2];
      } else
      {
        return (buffer->values[(buffer->abscount-2)/2]+buffer->values[buffer->abscount/2])/2.0;
      }
    }
    #endif
    
     
  • Bob Newland

    Bob Newland - 2011-09-16

    If there is any interest in a percentile function, I modified the median for this…
    perc(val, dec, percentile) so perc(val, 3, 50) == median(val, 3);
    Built with mysql-5.5.15
    Tested on
    5.1.41-3ubuntu12

    /*
        returns the nth percentile of the values in a distribution based on linear regression model
        copied from median
        input parameters:
        data (real)
        number of decimals in result (int, optional)
        percentile (int, optional) [default=50] median
        output:
        perc value of the distribution (real)
        registering the function:
        CREATE AGGREGATE FUNCTION perc RETURNS REAL SONAME 'udf_perc.so';
        getting rid of the function:
        DROP FUNCTION perc;
    */
    #ifdef STANDARD
    #include <stdio.h>
    #include <string.h>
    #ifdef __WIN__
    typedef unsigned __int64 ulonglong; 
    typedef __int64 longlong;
    #else
    typedef unsigned long long ulonglong;
    typedef long long longlong;
    #endif /*__WIN__*/
    #else
    #include <my_global.h>
    #include <my_sys.h>
    #endif
    #include <mysql.h>
    #include <m_ctype.h>
    #include <m_string.h>       
    #ifdef HAVE_DLOPEN
    #define BUFFERSIZE 1024 
    extern "C" {
    my_bool perc_init( UDF_INIT* initid, UDF_ARGS* args, char* message );
    void perc_deinit( UDF_INIT* initid );
    void perc_reset( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char *error );
    void perc_add( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char *error );
    double perc( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char *error );
    void perc_clear( UDF_INIT* initid, char* is_null, char *error );
    }
    struct perc_data
    {
        unsigned long percentile;
        unsigned long count;
        unsigned long abscount;
        unsigned long pages;
        double *values;
    };
    void perc_clear(UDF_INIT* initid, char* is_null, char* message)
    {
        perc_data *buffer = (perc_data*)initid->ptr;
        buffer->count = 0;
        buffer->abscount=0;
        buffer->pages = 1;
        *is_null = 0;
        if (buffer->values != NULL)
        {
            free(buffer->values);
            buffer->values=NULL;
        }
        buffer->values=(double *) malloc(BUFFERSIZE*sizeof(double));
    }
    my_bool perc_init( UDF_INIT* initid, UDF_ARGS* args, char* message )
    {
        if (args->arg_count < 1 || args->arg_count>3)
        {
            strcpy(message,"wrong number of arguments: perc(val, [decimals(2), [percentile(50)] ])");
            return 1;
        }
        args->arg_type[0]=REAL_RESULT;
        if (args->arg_type[0]!=REAL_RESULT)
        {
            strcpy(message,"perc() requires a real as parameter 1");
            return 1;
        }
        if (args->arg_count>1 && (args->arg_type[1]!=INT_RESULT))
        {
            strcpy(message,"perc() requires an int as parameter 2");
            return 1;
        }
        initid->decimals=2;
        if (args->arg_count>=2 && (*((ulong*)args->args[1])<=16))
        {
            initid->decimals=*((ulong*)args->args[1]);
            if (args->arg_count>=3 && (*((ulong*)args->args[2])<=100))
            {
                initid->extension = (void *)args->args[2];
            }   
        }
        perc_data *buffer = new perc_data;
        buffer->count = 0;
        buffer->abscount=0;
        buffer->pages = 1;
        buffer->values = NULL;
        buffer->percentile = *((ulong*)initid->extension);
        initid->maybe_null  = 1;
        initid->max_length  = 32;
        initid->ptr = (char*)buffer;
        return 0;
    }
    void perc_deinit( UDF_INIT* initid )
    {
        perc_data *buffer = (perc_data*)initid->ptr;
        if (buffer->values != NULL)
        {
            free(buffer->values);
            buffer->values=NULL;
        }
        delete initid->ptr;
    }
    void perc_reset( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char* is_error )
    {
        perc_data *buffer = (perc_data*)initid->ptr;
        buffer->count = 0;
        buffer->abscount=0;
        buffer->pages = 1;
        *is_null = 0;
        *is_error = 0;
        if (buffer->values != NULL)
        {
            free(buffer->values);
            buffer->values=NULL;
        }
        buffer->values=(double *) malloc(BUFFERSIZE*sizeof(double));
        perc_add( initid, args, is_null, is_error );
    }
    void perc_add( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char* is_error )
    {
        if (args->args[0]!=NULL)
        {
            perc_data *buffer = (perc_data*)initid->ptr;
            if (buffer->count>=BUFFERSIZE)
            {
                buffer->pages++;
                buffer->count=0;
                buffer->values=(double *) realloc(buffer->values,BUFFERSIZE*buffer->pages*sizeof(double));
            }
            buffer->values[buffer->abscount++] = *((double*)args->args[0]);
            buffer->count++;
        }
    }
    int
    compare_doubles (const void *a, const void *b)
    {
        const double *da = (const double *) a;
        const double *db = (const double *) b;
        return (*da > *db) - (*da < *db);
    }
    double perc( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char* is_error )
    {
        float k, f, c;
        double d0, d1;
        perc_data* buffer = (perc_data*)initid->ptr;
        if (buffer->abscount==0 || *is_error!=0)
        {
            *is_null = 1;
            return 0.0;
        }
        *is_null=0;
        if (buffer->abscount==1)
        {
            return buffer->values[0];
        }
        qsort(buffer->values,buffer->abscount,sizeof(double),compare_doubles);
        k = ((double)buffer->abscount * (double)buffer->percentile) / 100.0;
        f = (float)((int)k);
        c = (float)((int)(k + 0.5));
        if (f == c)
            return buffer->values[(int)k];
        else
        {
            // linear regresion
            d0 = buffer->values[(int)f];
            d1 = buffer->values[(int)c];
            return (d1 - d0) * (k - f) + d0;
        }
    }
    #endif
    
     

Log in to post a comment.

Want the latest updates on software, tech news, and AI?
Get latest updates about software, tech news, and AI from SourceForge directly in your inbox once a month.