[Assorted-commits] SF.net SVN: assorted: [862] tcq-wavelets

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 862
          http://assorted.svn.sourceforge.net/assorted/?rev=862&view=rev
Author:   yangzhang
Date:     2008-07-01 23:37:56 -0700 (Tue, 01 Jul 2008)

Log Message:
-----------
added tcq wavelets

Added Paths:
-----------
    tcq-wavelets/
    tcq-wavelets/trunk/
    tcq-wavelets/trunk/Test/
    tcq-wavelets/trunk/Test/Backup/
    tcq-wavelets/trunk/Test/Backup/histogram.c
    tcq-wavelets/trunk/Test/Backup/histogram.h
    tcq-wavelets/trunk/Test/Backup/pg_proc.h
    tcq-wavelets/trunk/Test/Backup/pg_type.h
    tcq-wavelets/trunk/Test/Backup/shedding.c
    tcq-wavelets/trunk/Test/Backup/shedding.h
    tcq-wavelets/trunk/Test/Backup/wavelet.c
    tcq-wavelets/trunk/Test/Backup/wavelet.h
    tcq-wavelets/trunk/Test/Backup/wrapch.c
    tcq-wavelets/trunk/Test/Copy (2) of Test.cpp
    tcq-wavelets/trunk/Test/Copy (2) of wavelet.c
    tcq-wavelets/trunk/Test/Copy (2) of wavelet.h
    tcq-wavelets/trunk/Test/Copy (3) of wavelet.c
    tcq-wavelets/trunk/Test/Copy (3) of wavelet.h
    tcq-wavelets/trunk/Test/Copy (4) of wavelet.c
    tcq-wavelets/trunk/Test/Copy (4) of wavelet.h
    tcq-wavelets/trunk/Test/Copy (5) of wavelet.c
    tcq-wavelets/trunk/Test/Copy (5) of wavelet.h
    tcq-wavelets/trunk/Test/Copy (6) of wavelet.c
    tcq-wavelets/trunk/Test/Copy (6) of wavelet.h
    tcq-wavelets/trunk/Test/Copy (7) of wavelet.c
    tcq-wavelets/trunk/Test/Copy (7) of wavelet.h
    tcq-wavelets/trunk/Test/Copy (8) of wavelet.c
    tcq-wavelets/trunk/Test/Copy (8) of wavelet.h
    tcq-wavelets/trunk/Test/Copy of Test.cpp
    tcq-wavelets/trunk/Test/Copy of wavelet.c
    tcq-wavelets/trunk/Test/Copy of wavelet.h
    tcq-wavelets/trunk/Test/Test.ncb
    tcq-wavelets/trunk/Test/Test.sln
    tcq-wavelets/trunk/Test/Test.suo
    tcq-wavelets/trunk/Test/Test.vcproj
    tcq-wavelets/trunk/Test/stdafx.cpp
    tcq-wavelets/trunk/Test/stdafx.h
    tcq-wavelets/trunk/Test/wavelet.c
    tcq-wavelets/trunk/Test/wavelet.h

Added: tcq-wavelets/trunk/Test/Backup/histogram.c
===================================================================

--- tcq-wavelets/trunk/Test/Backup/histogram.c	                        (rev 0)
+++ tcq-wavelets/trunk/Test/Backup/histogram.c	2008-07-02 06:37:56 UTC (rev 862)
@@ -0,0 +1,3741 @@
+/*
+ *	PostgreSQL type definitions for built-in histogram data types.
+ *
+ *	$Header: /project/eecs/db/cvsroot/postgres/src/backend/telegraphcq/histogram.c,v 1.23 2004/05/14 21:29:14 yangsta Exp $ 
+ */
+
+#include <ctype.h>
+#include <sys/time.h> // yang
+
+    /* For toupper(3) */
+
+#include "postgres.h"
+
+#include "access/hash.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+#include "lib/stringinfo.h"
+#include "utils/builtins.h"
+#include "utils/elog.h"
+#include "utils/relcache.h"
+
+#include "utils/lsyscache.h"
+    /* For get_typlenbyval() function. */
+
+
+#include "telegraphcq/shedding.h"
+#include "telegraphcq/histogram.h"
+    
+
+#define ENABLE_STREAM_STATE_CHECKS
+
+/*******************************************************************************
+ * INTERNAL DATA STRUCTURES
+ ******************************************************************************/
+
+/* Used for passing maxdiff information back. 
+ *
+ * In particular, get_maxdiffs() returns a pointer to an array of these, one per
+ * bucket. */
+typedef struct bucket_maxdiff_info {
+    int abs;
+        /* The absolute difference between two adjacent tuples when sorted
+         * along a particular dimension. */
+
+    int dimid;
+        /* The dimension along which this difference occurs. */
+
+    double norm;
+        /* Normalized to the min. and max. values along this dimension. */
+
+} bucket_maxdiff_info;
+
+
+/*******************************************************************************
+ * PROTOTYPES FOR INTERNAL FUNCTIONS 
+ ******************************************************************************/
+
+static mhist * real_mhist_in(char *relname, 
+		char **cols, 
+		int ncols, 
+		int num_buckets);
+
+static void mhist_set_nameinfo(mhist *hist, char *relname, 
+                                char **cols, int ncols);
+
+static HeapTuple *get_table_as_array(char *relname, char **cols, int ncols,
+        int *ntuples, TupleDesc *descdest);
+
+static bool split_a_bucket(HeapTuple *giant_tup_array, int ntuples, 
+        TupleDesc desc,
+        int *bucket_offsets, int *bucket_lengths, mhist *hist, 
+        mhist_dim_info *ranges, int nbuckets_done);
+
+static void sort_tuples_along_dim(HeapTuple *tuples, int length, int dimid,
+        TupleDesc desc);
+
+static void load_mhist(char *relname, char **cols, mhist *hist);
+
+static mhist * create_mhist_copy(const mhist *orig);
+static mhist * create_bigger_mhist(mhist *hist);
+
+mhist * create_proj_mhist(const mhist * original, int dim);
+
+void srf_firstcall_junk(FuncCallContext *funcctx, void *state, int max_calls,
+        char *typename);
+
+void srf_cleanup(FuncCallContext *funcctx);
+
+mhist_dim_info * get_ranges(HeapTuple *giant_tup_array, int ntuples, 
+        TupleDesc desc, int ndims);
+
+bucket_maxdiff_info *get_maxdiffs(
+        HeapTuple *giant_tup_array, 
+        TupleDesc desc,
+        const int *bucket_offsets,
+        const int *bucket_lengths, 
+        int nbuckets_done, 
+        const mhist_dim_info *ranges);
+
+int get_mhist_bucket_for_tuple(mhist *hist, HeapTuple tuple, TupleDesc desc);
+
+mhist * real_mhist_project(mhist *inhist, int32 proj_dim);
+
+void project_mhist_bucket(mhist_bucket *src, mhist_bucket *dest, 
+        int ndims, int proj_dim);
+
+void remove_empty_mhist_buckets(mhist *hist);
+
+mhist *merge_overlapping_mhist_buckets(mhist *hist);
+
+mhist *really_merge_overlapping_mhist_buckets(mhist *hist, bool *found_overlap,
+        bool verbose);
+
+mhist *merge_two_overlapping_buckets(mhist *hist, 
+        int left_bucketid, int right_bucketid, bool verbose);
+
+void cut_top_off_mhist_bucket(mhist_bucket *bucket_ptr, int dimid, int new_max);
+
+void cut_bottom_off_mhist_bucket(mhist_bucket *bucket_ptr, int dimid, 
+        int new_min);
+
+bool potential_overlap(mhist_bucket *left_bucket, mhist_bucket *right_bucket,
+        int ndims);
+
+double mhist_total_count(mhist *hist);
+
+char *mhist_to_str(mhist *hist);
+ 
+mhist * mhist_cross_product(const mhist *hist1, const mhist *hist2);
+
+void mhist_equality_select_in_place(mhist *hist, int dim1, int dim2);
+
+void check_mhist_integrity(const mhist *hist, const char *id);
+
+void get_mhist_split_point(HeapTuple *giant_tup_array, TupleDesc desc, 
+        const int *bucket_offsets,
+        const int *bucket_lengths, 
+        int nbuckets_done, 
+        const mhist_dim_info *ranges,
+        int *split_bucketid, int *split_dimid, int *split_offset);
+
+bool no_buckets_to_split(HeapTuple *giant_tup_array,
+        const int *bucket_offsets, 
+        const int *bucket_lengths, 
+        int nbuckets_done,
+        TupleDesc desc);
+
+void mhist_stream_load_buckets(mhist_stream_state *state);
+
+void mhist_create_buckets(mhist *hist, HeapTuple *tuparr, int ntups, 
+        TupleDesc desc);
+
+void tdesc_to_mhist_diminfo(mhist *hist, TupleDesc desc);
+
+mhist * create_reg_mhist(HeapTuple *tups, int ntups, 
+        TupleDesc desc, int bucket_width);
+
+int create_reg_buckets(mhist *hist, mhist_dim_info *ranges, int num_dims,
+        int cur_dim, int bucket_width, int bucket_num, int *itr);
+
+int remove_duplicate_names(mhist_dim_name *names, int num_names);
+
+int get_dimid_for_name(mhist_dim_name *names, int num_names, 
+        const char *name);
+
+int get_random(HeapTuple*, int, int, int, int, double);
+
+void create_prob_array(double*, double, int);
+
+int skip_random(double*, int);
+
+void print_array(HeapTuple*, int);
+
+/*******************************************************************************
+ * PROFILING
+ ******************************************************************************/
+
+int asdf(char *);
+int zxcv(char *);
+
+int asdf(char *str) {
+#ifdef ASDF
+    struct timeval  tv;
+    gettimeofday(&tv, 0);
+    elog(LOG, "YANG START %s %d.%0.6d", str, tv.tv_sec, tv.tv_usec);
+#endif
+    return 0;
+}
+
+int zxcv(char *str) {
+#ifdef ZXCV
+    struct timeval  tv;
+    gettimeofday(&tv, 0);
+    elog(LOG, "YANG END %s %d.%0.6d", str, tv.tv_sec, tv.tv_usec);
+#endif
+    return 0;
+}
+
+/*******************************************************************************
+ * EXTERNAL FUNCTIONS
+ ******************************************************************************/
+
+/* FUNCTION create_mhist
+ * ARGUMENTS: <num_buckets> is the number of buckets for a new histogram, 
+ *		and <num_dims> is the number of dimensions of the histogram.
+ * POSTCONDITIONS: Creates a new, empty histogram data structure and fills in
+ *		the header fields.
+ */
+mhist * create_mhist(int num_buckets, int num_dims) {
+    mhist *result = NULL;
+    int32 hist_len = MHIST_SIZE(num_buckets, num_dims);
+
+    if (hist_len < MHIST_HDRSZ) {
+        elog(ERROR, "Invalid MHIST length of %d.", hist_len);
+    }
+
+    result = (mhist*)palloc(hist_len);
+
+    /*
+    elog(LOG, "create_mhist(): Created a structure of size %d at %p.",
+            hist_len, result);
+    elog(LOG, "create_mhist():     Last byte at %p.", 
+            ((char*)result + hist_len) );
+    */
+
+    /* Zero out the struct so as not to screw up Postgres hash functions. */
+    memset(result, 0x0, hist_len);
+
+    result->hdr.length = hist_len;
+    result->hdr.numDims = num_dims;
+	result->hdr.numBucketsUsed = result->hdr.numBucketsAllocated = num_buckets;
+
+    return result;
+}
+
+/* FUNCTION mhist_load_buckets()
+ * ARGUMENTS: <giant_tup_array> is an array containing a bunch of tuples.
+ *      <ntuples> is the number of tuples in the array.
+ *      <hist> is an MHIST.
+ * PRECONDITIONS: <hist> has not been loaded yet, but its bucket boundaries
+ *      have already been determined.
+ * POSTCONDITIONS: Fills up the buckets array of <hist> with information about
+ *      the indicated buckets.
+ */
+void mhist_load_buckets(HeapTuple *giant_tup_array, TupleDesc desc,
+        int ntuples, mhist *hist) 
+{
+	asdf("mhist_load_buckets");
+    int     tupleid = -1;
+
+    /* SPECIAL CASE: No tuples to load. */
+    if (0 == ntuples) { 
+        return;
+    }
+    /* END SPECIAL CASE */
+
+    for (tupleid = 0; tupleid < ntuples; tupleid++) {
+
+        /* Figure out what bucket this tuple should go into. */
+        int bucketid = get_mhist_bucket_for_tuple(hist, 
+                giant_tup_array[tupleid], desc);
+
+        mhist_bucket *target_bucket_ptr = MHIST_BUCKET_PTR(hist, bucketid);
+
+        /* Increment the counters. */
+        /* TODO: Calculate the number of unique values along each dimension! */
+        target_bucket_ptr->count += 1.0;
+    }
+
+	zxcv("mhist_load_buckets");
+}
+
+
+
+/*
+ * MHist constructor.  Takes a single string argument, in the following format:
+ *
+ *	(<Relation name>),(<col1>,col2>,...,<coln>),(<numbuckets>)
+ *     
+ * Where:
+ *		<Relation name> is the name of the relation to be histogrammed.
+ *		<col1>,col2>,...,<coln> are the columns of the relation to be
+ *				included in the histogram.
+ *		<numbuckets> is the number of buckets in the histogram.
+ */
+Datum
+mhist_in(PG_FUNCTION_ARGS)
+{
+	asdf("mhist_in");
+
+	char		*str = PG_GETARG_CSTRING(0);
+	char		*str_copy = NULL;
+	char		*relname_str = NULL, *cols_str = NULL, *num_buckets_str = NULL;
+	char		**cols_arr = NULL;
+	int			ncols = -1, colno = -1;
+	char		*ptr = NULL;
+    mhist       *result = NULL;
+
+    /* TODO: Sould the parsing code below be moved to a single function? */
+
+    /* Parse a copy of the string, strtok() style. */
+	str_copy = palloc(1 + strlen(str));
+	strcpy(str_copy, str);
+
+	/* First the relation name, "(name)". */
+	if (str_copy[0] != '(') goto parse_error;
+	relname_str = ptr = str_copy + 1;
+	while (*ptr != ')') {
+		if ('\0' == *ptr) goto parse_error; ++ptr;
+	}
+	*ptr = '\0'; ++ptr;
+
+	/*elog(LOG, "mhist_in(): Relation string is '%s'.", relname_str);*/
+
+	/* Skip the comma between the parens. */
+	if (*ptr != ',') goto parse_error; ++ptr;
+
+	/* Then parse the column names, "(col1,col2,...,coln)". */
+	if (*ptr != '(') goto parse_error; ++ptr;
+	cols_str = ptr;
+	while (*ptr != ')') {
+		if ('\0' == *ptr) goto parse_error; ++ptr;
+	}
+	*ptr = '\0'; ++ptr;
+
+	/*elog(LOG, "mhist_in(): Cols string is '%s'.", cols_str);*/
+
+	/* Skip another comma, and parse the number of buckets. */
+	if (*ptr != ',') goto parse_error; ++ptr;
+	if (*ptr != '(') goto parse_error; ++ptr;
+	num_buckets_str = ptr;
+	while (*ptr != ')') {
+		if ('\0' == *ptr) goto parse_error; ++ptr;
+	}
+	*ptr = '\0'; ++ptr;
+
+	/*elog(LOG, "mhist_in(): Num. buckets string is '%s'.", num_buckets_str);*/
+
+	/* TODO: We currently ignore any junk at the end of the string.  Should we
+	 * read through it and complain about non-whitespace characters? */
+
+	/* Generate an array of column names. */
+	/* First we need to count the number of columns. */
+	ncols = 1;
+	ptr = cols_str;
+	if (ptr == '\0') goto parse_error;
+	while (*ptr != '\0') {
+		if (',' == *ptr)  ++ncols;
+		++ptr;
+	}
+
+	Assert(ncols <= MHIST_MAX_DIMS);
+
+	
+	/* Then we generate the array, again by parsing strtok()-style. */
+	cols_arr = palloc(ncols * sizeof(char*));
+	ptr = cols_str;
+	for (colno = 0; colno < ncols - 1; colno++) {
+		cols_arr[colno] = ptr;
+		while (*ptr != ',') ++ptr;
+		*ptr = '\0'; ++ptr;
+	}
+	cols_arr[ncols-1] = ptr;
+
+	/* Generate the histogram object. */
+	result = real_mhist_in(relname_str, cols_arr, ncols, 
+            atoi(num_buckets_str));
+
+	/* Clean up. */
+    /*elog(LOG, "mhist_in(): Cleaning up.");*/
+	pfree(str_copy);
+	pfree(cols_arr);
+
+	zxcv("mhist_in");
+
+	/* Now we're done. */
+	PG_RETURN_MHIST_P(result);
+
+	/* Error handlers. */
+parse_error:
+	/* elog(ERROR) should clean up everything we palloc()'d. */
+	elog(ERROR, "mhist_in(): Error parsing string '%s'.", str);
+
+	zxcv("mhist_in");
+
+	/* UNREACHABLE, but the compiler might not know that. */
+	PG_RETURN_MHIST_P(NULL);
+}
+
+/*
+ * Constructor that builds an MHIST with square buckets.  Takes a single 
+ * string argument, in the following format:
+ *
+ *	(<Relation name>),(<col1>,col2>,...,<coln>),(<bucketwidth>)
+ *     
+ * Where:
+ *		<Relation name> is the name of the relation to be histogrammed.
+ *		<col1>,col2>,...,<coln> are the columns of the relation to be
+ *				included in the histogram.
+ *		<bucketwidth> is the width of a single bucket along each dimension.
+ */
+Datum
+square_mhist_in(PG_FUNCTION_ARGS)
+{
+	asdf("square_mhist_in");
+
+	char		*str = PG_GETARG_CSTRING(0);
+	char		*str_copy = NULL;
+	char		*relname_str = NULL, *cols_str = NULL, 
+                    *bucketwidth_str = NULL;
+	char		**cols_arr = NULL;
+	int			ncols = -1, colno = -1;
+	char		*ptr = NULL;
+    mhist       *result = NULL;
+    HeapTuple   *giant_tup_array = NULL;
+    TupleDesc   desc = NULL;
+    int         ntuples = -1;
+
+    /* TODO: Sould the parsing code below be moved to a single function? */
+
+    /* Parse a copy of the string, strtok() style. */
+	str_copy = palloc(1 + strlen(str));
+	strcpy(str_copy, str);
+
+	/* First the relation name, "(name)". */
+	if (str_copy[0] != '(') goto parse_error;
+	relname_str = ptr = str_copy + 1;
+	while (*ptr != ')') {
+		if ('\0' == *ptr) goto parse_error; ++ptr;
+	}
+	*ptr = '\0'; ++ptr;
+
+	/*elog(LOG, "mhist_in(): Relation string is '%s'.", relname_str);*/
+
+	/* Skip the comma between the parens. */
+	if (*ptr != ',') goto parse_error; ++ptr;
+
+	/* Then parse the column names, "(col1,col2,...,coln)". */
+	if (*ptr != '(') goto parse_error; ++ptr;
+	cols_str = ptr;
+	while (*ptr != ')') {
+		if ('\0' == *ptr) goto parse_error; ++ptr;
+	}
+	*ptr = '\0'; ++ptr;
+
+	/*elog(LOG, "mhist_in(): Cols string is '%s'.", cols_str);*/
+
+	/* Skip another comma, and parse the number of buckets. */
+	if (*ptr != ',') goto parse_error; ++ptr;
+	if (*ptr != '(') goto parse_error; ++ptr;
+	bucketwidth_str = ptr;
+	while (*ptr != ')') {
+		if ('\0' == *ptr) goto parse_error; ++ptr;
+	}
+	*ptr = '\0'; ++ptr;
+
+	/*elog(LOG, "mhist_in(): Num. buckets string is '%s'.", bucketwidth_str);*/
+
+	/* TODO: We currently ignore any junk at the end of the string.  Should we
+	 * read through it and complain about non-whitespace characters? */
+
+	/* Generate an array of column names. */
+	/* First we need to count the number of columns. */
+	ncols = 1;
+	ptr = cols_str;
+	if (ptr == '\0') goto parse_error;
+	while (*ptr != '\0') {
+		if (',' == *ptr)  ++ncols;
+		++ptr;
+	}
+
+	Assert(ncols <= MHIST_MAX_DIMS);
+
+	
+	/* Then we generate the array, again by parsing strtok()-style. */
+	cols_arr = palloc(ncols * sizeof(char*));
+	ptr = cols_str;
+	for (colno = 0; colno < ncols - 1; colno++) {
+		cols_arr[colno] = ptr;
+		while (*ptr != ',') ++ptr;
+		*ptr = '\0'; ++ptr;
+	}
+	cols_arr[ncols-1] = ptr;
+
+    /* Read in the tuples for this histogram. */
+    giant_tup_array = get_table_as_array(relname_str, cols_arr, ncols,
+                                        &ntuples, &desc);
+
+	/* Generate the histogram object. */
+    result = create_reg_mhist(giant_tup_array, ntuples, desc,
+                                    atoi(bucketwidth_str));
+
+    /* create_reg_mhist() doesn't know about the relation names, because
+     * they're not in the tuple descriptor.  So we overwrite the dimension name
+     * information. */
+    mhist_set_nameinfo(result, relname_str, cols_arr, ncols);
+
+    /* Fill its buckets. */
+    mhist_load_buckets(giant_tup_array, desc, ntuples, result);
+
+	/* Clean up. */
+    /*elog(LOG, "mhist_in(): Cleaning up.");*/
+	pfree(str_copy);
+	pfree(cols_arr);
+
+	zxcv("square_mhist_in");
+
+	/* Now we're done. */
+	PG_RETURN_MHIST_P(result);
+
+	/* Error handlers. */
+parse_error:
+	/* elog(ERROR) should clean up everything we palloc()'d. */
+	elog(ERROR, "square_mhist_in(): Error parsing string '%s'.", str);
+
+	zxcv("square_mhist_in");
+
+	/* UNREACHABLE, but the compiler might not know that. */
+	PG_RETURN_MHIST_P(NULL);
+}
+/*
+ *	MHIST-to-string conversion function (for output).
+ *
+ *	It's hard to convert a histogram to a one-line string, so this function
+ *	just prints a note to the effect that this is an MHIST.
+ */
+Datum
+mhist_out(PG_FUNCTION_ARGS)
+{
+	mhist      *hist = PG_GETARG_MHIST_P(0);
+	char	   *result = mhist_to_str(hist);
+
+	PG_RETURN_CSTRING(result);
+}
+
+/*
+ * Function to convert an mhist into relational representation.  This is a
+ * set-returning function (See section 9.5.6.2 of the Programmer's Reference),
+ * so it needs to save its state across multiple calls.  Yuck.
+ *
+ * Based on hist_to_table().
+ *
+ * In order to call this function, you need to define the following SQL
+ * datatypes:
+ *
+ * CREATE_TYPE __mhist1 AS (bucketid, min0, max0, count);
+ * CREATE_TYPE __mhist2 AS (bucketid, min0, max0, min1, max1, count);
+ * CREATE_TYPE __mhist3 AS (bucketid, min0, max0, min1, max1, min2, max2, 
+ *                          count);
+ * CREATE_TYPE __mhist4 AS (bucketid, min0, max0, min1, max1, min2, max2,
+ *                          min3, max3, count);
+ * [Add additional types here if you change the value of HISTOGRAM_MAX_DIMS.]
+ */
+Datum
+mhist_to_table(PG_FUNCTION_ARGS)
+{
+	asdf("mhist_to_table");
+
+	FuncCallContext		    *funcctx = NULL; 
+	mhist_to_table_state    *state = NULL;
+	mhist			        *hist = PG_GETARG_MHIST_P(0);
+/*	TupleDesc			    tupdesc;*/
+	char				    typename[128];
+
+    check_mhist_integrity(hist, "mhist_to_table()");
+		
+	if (SRF_IS_FIRSTCALL()) { 
+        MemoryContext		    oldcontext; 
+
+        funcctx = SRF_FIRSTCALL_INIT(); 
+
+        /*elog(LOG, "mhist_to_table(): Starting first-call setup.");*/
+
+		
+   		/* Create the data structure that will remember the status of this
+		 * function. */
+        oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); 
+		state = palloc(sizeof(*state));
+        MemoryContextSwitchTo(oldcontext);
+
+        state->current_bucket = 0;
+        state->num_buckets = MHIST_NUM_BUCKETS_USED(hist);
+
+        /* Generate the name of the type that corresponds to the histogram's
+		 * number of dimensions. */
+		snprintf(typename, sizeof(typename), "__mhist%d", hist->hdr.numDims);
+
+        /* Postgres SRF crap has been moved to this function: */
+	    srf_firstcall_junk(funcctx, state, 
+                MHIST_NUM_BUCKETS_USED(hist), typename);
+
+        /*elog(LOG, "mhist_to_table(): Done with first-call setup.");*/
+	} 
+	
+	/* Stuff that gets done every time the function is called: */
+	funcctx = SRF_PERCALL_SETUP(); 
+	
+	state = (mhist_to_table_state*)(funcctx->user_fctx);
+		
+	/* Are we done returning tuples? */
+	if (state->current_bucket < state->num_buckets) {
+		char		**values;
+        int         numvalues = -1;
+		HeapTuple	tuple;
+		Datum		result;
+		int			valid = -1;
+
+        /*
+        elog(LOG, "mhist_to_table(): Generating tuple for bucket %d (%d total).",
+                state->current_bucket, state->num_buckets);
+                */
+
+		/* Not done returning tuples -- construct the next one. */
+		/* For the sake of simplicity, we convert the field values into strings
+		 * and then back into values. */
+
+        numvalues = 2 * hist->hdr.numDims + 2; 
+			/* Number of items in row == 2 + (2 x number of dimensions:
+             *   1 column for the bucketID (not strictly necessary)
+             *   1 column for the bucket's count
+             *   2 columns per dimension to hold min and max values.
+             */
+
+        /*elog(LOG, "mhist_to_table():     This tuple has %d fields.",
+                numvalues);*/
+
+		/* Build up the array of string values. */
+		values = (char**)palloc(sizeof(char*) * (numvalues));
+
+		for (valid = 0; valid < numvalues; valid++) {
+			values[valid] = (char*)palloc(32 * sizeof(char));
+		}
+
+		/* Fill in the array. */
+		for (valid = 0; valid < numvalues; valid++) {
+            if (0 == valid) {
+                /* First entry is the bucketid. */
+                snprintf(values[valid], 32, "%d", state->current_bucket);
+
+            } else if (numvalues - 1 == valid) {
+                /* Last entry is the bucket count. */
+                mhist_bucket *cur_bucket = 
+                    MHIST_BUCKET_PTR(hist, state->current_bucket);
+                /*
+                snprintf(values[valid], 32, "%d", 
+                        (int)(cur_bucket->count + 0.5));
+                        */
+                    /* Round up if necessary */
+                snprintf(values[valid], 32, "%f", 
+                        (cur_bucket->count));
+                 
+            } else {
+                /* valid > 0 and < (numvalues-1) */
+                /* Entries between first and last are the bounds of the
+                 * rectangle that comprises this bucket.  These entries
+                 * alternate between the lower and upper bound. */
+                int dimid = (valid - 1) / 2;
+                bool is_min = (valid % 2 == 1);
+                    /* Is this a lower or upper bound? */
+                mhist_bucket *cur_bucket = 
+                    MHIST_BUCKET_PTR(hist, state->current_bucket);
+                snprintf(values[valid], 32, "%d", 
+                        is_min ?
+                        cur_bucket->diminfo[dimid].min_val
+                        : cur_bucket->diminfo[dimid].max_val);
+            }
+
+            /*elog(LOG, "mhist_to_table():     Value %d is '%s'.",
+                    valid, values[valid]);*/
+
+		}
+
+        /*elog(LOG, "mhist_to_table():     Done generating values.");*/
+
+		/* Construct the tuple for the strings. */
+		tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+
+        /*elog(LOG, "mhist_to_table():     Done building tuple.");*/
+
+		/* Make the tuple into a datum. */ 
+		result = TupleGetDatum(funcctx->slot, tuple);
+
+		/* Clean up. */
+		for (valid = 0; valid < hist->hdr.numDims; valid++) {
+            /*elog(LOG, "mhist_to_table(): Freeing field %d of a tuple.",
+                    valid);*/
+			pfree(values[valid]);
+		}
+        /*elog(LOG, "mhist_to_table(): Freeing holder for tuple %d.",
+                state->current_bucket);*/
+		pfree(values);
+
+        /*elog(LOG, "mhist_to_table(): Done generating tuple for bucket %d.",
+                state->current_bucket);*/
+
+        ++(state->current_bucket);
+
+		zxcv("mhist_to_table");
+
+		SRF_RETURN_NEXT(funcctx, result); 
+	} else { 
+        /*elog(LOG, "mhist_to_table(): Cleaning up.");*/
+
+		/* here we are done returning items, and just need to clean up: */ 
+        srf_cleanup(funcctx);
+		
+		zxcv("mhist_to_table");
+
+		SRF_RETURN_DONE(funcctx); 
+	}
+}
+
+
+/*
+ * Function that removes a dimension of an MHIST, merging all bins along the
+ * dimension.
+ *
+ * Usage: mhist_project(hist, dim)
+ *
+ * Where:
+ *			<hist> is a histogram created with mhist_in().
+ *			<dim> is the name of the dimension to eliminate.
+ *
+ * This function creates and returns a new MHIST.
+ */
+Datum
+mhist_project(PG_FUNCTION_ARGS) {
+	asdf("mhist_project");
+
+	mhist		    *inhist = PG_GETARG_MHIST_P(0);	
+	char			*proj_dim_name = PG_GETARG_CSTRING(1);
+
+    mhist           *result = NULL;
+
+    /* Figure out which index to project. */
+    int             proj_dim = get_dimid_for_name(inhist->hdr.dimNames,
+                                                    inhist->hdr.numDimNames,
+                                                    proj_dim_name);
+
+    if (MHIST_NOT_A_DIMID == proj_dim) {
+        elog(ERROR, "mhist_project(): Dimension '%s' of '%s' not found.",
+                proj_dim_name, mhist_to_str(inhist));
+    }
+    
+    result = real_mhist_project(inhist, proj_dim);
+
+	zxcv("mhist_project");
+
+    PG_RETURN_MHIST_P(result); 
+}
+
+/*
+ * Function that removes all dimensions of an MHIST except for the dimensions
+ * corresponding to the specified columns.
+ *
+ * Usage: mhist_project_many(hist, dimlist)
+ *
+ * Where:
+ *			<hist> is a histogram created with mhist_in().
+ *			<dimlist> is a string containing the names of the dimensions to 
+ *			        keep, separated by spaces.
+ *
+ * This function creates and returns a new MHIST.
+ */
+Datum
+mhist_project_many(PG_FUNCTION_ARGS) {
+	asdf("mhist_project");
+
+	mhist		    *inhist = PG_GETARG_MHIST_P(0);	
+	char			*dimlist = PG_GETARG_CSTRING(1);
+    int             i = -1;
+    int             num_cols_kept = -1;
+    int             dimid = -1;
+    char            dimlist_copy[512];
+    char            *cols_kept[MHIST_MAX_DIMS];
+    mhist           *curhist = NULL;
+    bool            dims_to_keep[MHIST_MAX_DIMS];
+    bool            first_copy = false;
+    int             num_to_keep = -1;
+
+
+    /* Parse the list of column names. */
+    Assert(dimlist != NULL);
+
+    /* In one pass, we make an uppercase copy, count the number of columns, 
+     * and make an array of pointers to their names. */
+    num_cols_kept = 1;
+    cols_kept[0] = dimlist_copy;
+    for (i = 0; dimlist[i] != '\0' && i < sizeof(dimlist_copy); i++) {
+        if (' ' == dimlist[i]) {
+            /* Names are separated by a single space. */
+            dimlist_copy[i] = '\0';
+            cols_kept[num_cols_kept] = dimlist_copy + (i+1);
+            num_cols_kept++;
+            Assert(num_cols_kept < MHIST_MAX_DIMS);
+        } else {
+            dimlist_copy[i] = toupper(dimlist[i]);
+        }
+    }
+    dimlist_copy[i] = '\0';
+
+    elog(LOG, "mhist_project_many(): Keeping the following cols:");
+    for (i = 0;  i < num_cols_kept; i++) {
+        elog(LOG, "    '%s'", cols_kept[i]);
+    }
+
+    /* Now we go through each column and see if it is one of the columns we
+     * want to keep; if it is not, we will project it out. */
+    curhist = inhist;
+    for (dimid = 0; dimid < MHIST_NUM_DIMS(curhist); dimid++) {
+        int     colid = -1;
+
+        dims_to_keep[dimid] = false;
+            /* Start out assuming we're going to throw away this column. */
+
+        for (colid = 0; colid < num_cols_kept; colid++) {
+            int nameid = -1;
+            for (nameid = 0; nameid < MHIST_NUM_DIM_NAMES(curhist); nameid++) 
+            {
+                elog(LOG, "Comparing '%s' with '%s'",
+                        cols_kept[colid], MHIST_DIM_NAME(curhist, nameid));
+                if (MHIST_NAME_DIM(curhist, nameid) == dimid
+                        && 0 == strncasecmp(cols_kept[colid], 
+                                            MHIST_DIM_NAME(curhist, nameid), 
+                                            MHIST_COL_NAME_LEN)) 
+                {
+                    dims_to_keep[dimid] = true;
+                        /* Want to keep this column. */
+                    elog(LOG, "    Found a match!");
+                }
+            }
+        }
+    }
+
+    /* Make sure we got the right number of columns. */
+    num_to_keep = 0;
+    for (dimid = 0; dimid < MHIST_NUM_DIMS(curhist); dimid++) {
+        if (dims_to_keep[dimid]) {
+            num_to_keep++;
+        }
+    }
+
+    if (num_to_keep != num_cols_kept) {
+        elog(ERROR, "Only found %d of %d columns '%s'.",
+                num_to_keep, num_cols_kept, dimlist);
+    }
+
+    /* For now, we project dimensions out one by one, since that code is
+     * already written.  We do the dimensions in reverse order since projection
+     * causes the dimensions above the one projected to be renumbered. */
+    /* TODO: Perform all the projections in one step. */
+    first_copy = true;
+    for (dimid = MHIST_NUM_DIMS(curhist) - 1; dimid >= 0; dimid--) {
+        if (false == dims_to_keep[dimid]) {
+            mhist *newhist = real_mhist_project(curhist, dimid);
+
+            /* Don't want to pfree() the function argument, inhist. */
+            if (first_copy) {
+                first_copy = false;
+            } else {
+                pfree(curhist);
+            }
+            curhist = newhist; 
+        }
+    }
+
+	zxcv("mhist_project");
+
+	PG_RETURN_MHIST_P(curhist); 
+}
+
+
+
+
+/*
+ * Function that adds the corresponding buckets of two mhists.
+ *
+ * Usage: mhist_union(hist1, hist2)
+ *
+ * Where:
+ *			<hist1> and <hist2> are MHISTs created with mhist_in().
+ *
+ * The dimension names for the output histogram will be taken from the 
+ * union of the two histograms.
+ */
+Datum
+mhist_union(PG_FUNCTION_ARGS) {
+	asdf("mhist_union");
+
+mhist		    *hist1 = PG_GETARG_MHIST_P(0);	
+	mhist		    *hist2 = PG_GETARG_MHIST_P(1);	
+	mhist		    *result = NULL;
+	int				num_buckets = -1;
+	int				bucketid = -1;
+    int             nameid = -1;
+    int             num_names = -1;
+    mhist_dim_name  new_names[2 * MHIST_MAX_COL_NAMES];
+
+	
+	/* Verify the integrity of the inputs. */
+    check_mhist_integrity(hist1, "mhist_union() hist1");
+    check_mhist_integrity(hist2, "mhist_union() hist2");
+	if (MHIST_NUM_DIMS(hist1) != MHIST_NUM_DIMS(hist2)) {
+		elog(ERROR, 
+		"mhist_union(): Histograms have different numbers of dimensions.");	
+	}
+
+    elog(DEBUG1, "mhist_union():     Left arg is (%s).",
+            mhist_to_str(hist1));
+    elog(DEBUG1, "mhist_union():     Right arg is (%s).",
+            mhist_to_str(hist2));
+
+    /* Start out with the buckets of the original histograms. */
+	num_buckets = MHIST_NUM_BUCKETS_USED(hist1) 
+        + MHIST_NUM_BUCKETS_USED(hist2);
+
+    /* Allocate a new histogram containing all the buckets from the input
+     * histograms. */
+    result = create_mhist(num_buckets, MHIST_NUM_DIMS(hist1)); 
+
+    /* Copy over the dimension names, and remove duplicates. */
+    for (nameid = 0; nameid < MHIST_NUM_DIM_NAMES(hist1); nameid++) {
+        new_names[nameid] = MHIST_DIM_NAME_INFO(hist1, nameid);
+    }
+
+    for (nameid = 0; nameid < MHIST_NUM_DIM_NAMES(hist2); nameid++) {
+        new_names[nameid + MHIST_NUM_DIM_NAMES(hist1)] = 
+            MHIST_DIM_NAME_INFO(hist2, nameid);
+    }
+
+    num_names = remove_duplicate_names(new_names, 
+            MHIST_NUM_DIM_NAMES(hist1) + MHIST_NUM_DIM_NAMES(hist2));
+
+    if (num_names > MHIST_MAX_COL_NAMES) {
+        elog(ERROR, "mhist_union(): Too many dimension names.");
+    }
+
+    memcpy(result->hdr.dimNames, new_names, num_names * sizeof(*new_names));
+
+    MHIST_SET_NUM_DIM_NAMES(result, num_names);
+
+    for (bucketid = 0; bucketid < MHIST_NUM_BUCKETS_USED(hist1); bucketid++) {
+        memcpy(MHIST_BUCKET_PTR(result, bucketid), 
+                MHIST_BUCKET_PTR(hist1, bucketid),
+                MHIST_BUCKET_LEN(MHIST_NUM_DIMS(hist1)));
+    }
+
+    for (bucketid = 0; bucketid < MHIST_NUM_BUCKETS_USED(hist2); bucketid++) {
+        memcpy(MHIST_BUCKET_PTR(result, 
+                    bucketid + MHIST_NUM_BUCKETS_USED(hist1)),
+                MHIST_BUCKET_PTR(hist2, bucketid),
+                MHIST_BUCKET_LEN(MHIST_NUM_DIMS(hist1)));
+    }
+
+    /* Fix overlapping buckets. */
+    result = merge_overlapping_mhist_buckets(result);
+
+	zxcv("mhist_union");
+
+	PG_RETURN_MHIST_P(result);
+}
+
+/*
+ * Function that removes a dimension of am MHIST, keeping only those bins
+ * for which the bin along the indicated dimension was a particular number.
+ *
+ * Usage: mhist_slice(hist, dim, val)
+ *
+ * Where:
+ *			<hist> is a histogram created with hist_in().
+ *			<dim> is the name of the dimension to eliminate.
+ *			<val> is the value to keep along that dimension.
+ *
+ * This function creates and returns a new MHIST.
+ */
+Datum
+mhist_slice(PG_FUNCTION_ARGS) {
+	asdf("mhist_slice");
+
+	mhist		    *hist = PG_GETARG_MHIST_P(0);	
+	char			*dim_name = PG_GETARG_CSTRING(1);
+	int32			val = PG_GETARG_INT32(2);
+	mhist		    *result = NULL;
+    int             num_result_buckets = -1;
+    int             bucketid = -1;
+
+	int32			dim = -1;
+	
+	/* Verify the integrity of the inputs. */
+    check_mhist_integrity(hist, "mhist_slice() argument");
+
+    dim = get_dimid_for_name(hist->hdr.dimNames, 
+            MHIST_NUM_DIM_NAMES(hist), dim_name);
+
+    if (MHIST_NOT_A_DIMID == dim) {
+        elog(ERROR, "mhist_slice(): Dimension '%s' of '%s' not found.",
+                dim_name, mhist_to_str(hist));
+    }
+
+    /* TODO: Right now, we accept any value.  Is that okay? */
+
+	/* Create the empty output histogram. */
+	result = create_proj_mhist(hist, dim);
+
+	/* Project the buckets and adjust their counts.  While we're at it, remove
+     * buckets that don't overlap with the indicated slice. */
+    num_result_buckets = 0;
+    for (bucketid = 0; bucketid < MHIST_NUM_BUCKETS_USED(hist); bucketid++) 
+	{
+        int min_val = MHIST_DIMINFO_PTR(hist, bucketid, dim)->min_val;
+        int max_val = MHIST_DIMINFO_PTR(hist, bucketid, dim)->max_val;
+
+        /* Should we copy this bucket over? */
+        if (val >= min_val && val <= max_val) {
+            mhist_bucket *bkt_ptr = NULL;
+
+            project_mhist_bucket(
+                MHIST_BUCKET_PTR(hist, bucketid),
+                MHIST_BUCKET_PTR(result, num_result_buckets),
+                MHIST_NUM_DIMS(hist), dim);
+
+            /* Adjust the count of the bucket. */
+            bkt_ptr = MHIST_BUCKET_PTR(result, num_result_buckets);
+
+            /* We're taking a slice that is one unit wide. */
+            bkt_ptr->count /= (max_val - min_val + 1.0);
+
+            num_result_buckets++;
+        }
+	}
+
+    Assert(MHIST_NUM_BUCKETS_ALLOC(result) >= num_result_buckets);
+
+    MHIST_SET_NUM_BUCKETS_USED(result, num_result_buckets);
+
+	zxcv("mhist_slice");
+
+	PG_RETURN_MHIST_P(result);
+}
+
+/*
+ * Function that performs the approximate equijoin of two relations using
+ * MHISTs.
+ *
+ * Usage: mhist_equijoin(hist1, col1, hist2, col2)
+ *
+ * Where:
+ *			<hist1> and <hist2> are MHISTs created with mhist_in().
+ *			<col1> and <col2> are the names of columns for the MHISTs.
+ */
+Datum
+mhist_equijoin(PG_FUNCTION_ARGS)
+{
+	asdf("mhist_equijoin");
+
+	/* Function arguments. */
+    mhist		    *hist1 = PG_GETARG_MHIST_P(0);	
+    char*			colname1 = PG_GETARG_CSTRING(1);
+    mhist		    *hist2 = PG_GETARG_MHIST_P(2);	
+    char*			colname2 = PG_GETARG_CSTRING(3);	
+
+    /* Locals */
+    mhist           *xp = NULL;
+    mhist		    *result = NULL;
+    int             xp_dim1 = -1;
+    int             xp_dim2 = -1;
+    int				col1 = -1;
+    int				col2 = -1;
+    int             nameid = -1;
+
+    /* Check validity of arguments. */
+    check_mhist_integrity(hist1, "mhist_equijoin() hist1");
+    check_mhist_integrity(hist2, "mhist_equijoin() hist2");
+
+    col1 = get_dimid_for_name(hist1->hdr.dimNames, 
+            MHIST_MAX_COL_NAMES, colname1);
+    col2 = get_dimid_for_name(hist2->hdr.dimNames, 
+            MHIST_MAX_COL_NAMES, colname2);
+
+    if (MHIST_NOT_A_DIMID == col1) {
+        elog(ERROR, "mhist_equijoin(): Dimension '%s' of '%s' not found.", 
+                colname1, mhist_to_str(hist1));
+    }
+    if (MHIST_NOT_A_DIMID == col2) {
+        elog(ERROR, "mhist_equijoin(): Dimension '%s' of '%s' not found.", 
+                colname2, mhist_to_str(hist2));
+    }
+
+
+    /* First, compute the cross-product of the two histograms.  Using the
+     * cross-product isn't terribly space-efficient, but it makes 
+     * correctness easier. 
+     *
+     * TODO: Compute the join directly.
+     */
+    xp = mhist_cross_product(hist1, hist2);	
+    /*check_mhist_integrity(xp, "mhist_equijoin() xp");*/
+
+
+    elog(LOG, "mhist_equijoin(): Starting.");
+    elog(LOG, "mhist_equijoin():     Left arg is (%s).",
+            mhist_to_str(hist1));
+    elog(LOG, "mhist_equijoin():     Right arg is (%s).",
+            mhist_to_str(hist2));
+    elog(LOG, "mhist_equijoin():     Cross product is (%s).",
+            mhist_to_str(xp));
+
+    /* Map our dimensions into the dimensions of the cross-product histogram.
+     */
+    xp_dim1 = col1;
+    xp_dim2 = MHIST_NUM_DIMS(hist1) + col2;
+
+    /* Next, perform the selection. */
+    mhist_equality_select_in_place(xp, xp_dim1, xp_dim2);;
+
+    /*check_mhist_integrity(xp, "mhist_equijoin() done selection");*/
+
+    /* Eliminate zero-size buckets. */
+    remove_empty_mhist_buckets(xp);
+
+    /*check_mhist_integrity(xp, "mhist_equijoin() removed empty");*/
+
+    /* Finally, project the resulting histogram so we have only one copy of the
+     * join column. */
+    result = real_mhist_project(xp, xp_dim2);
+    /*check_mhist_integrity(xp, "mhist_equijoin() result");*/
+
+    /* The projection removed the names of the second column in the equijoin,
+     * but we want the names transferred to the first column.  Fix up the names
+     * array. */
+    for (nameid = 0; nameid < MHIST_NUM_DIM_NAMES(hist2); nameid++) {
+        if (MHIST_NAME_DIM(hist2, nameid) == col2) {
+            int new_id = MHIST_NUM_DIM_NAMES(result);
+            if (new_id >= MHIST_MAX_COL_NAMES) {
+                elog(ERROR, 
+                    "Too many column names (%d >= %d) creating equijoin.",
+                    new_id, MHIST_MAX_COL_NAMES);
+            }
+            result->hdr.dimNames[new_id] = hist2->hdr.dimNames[nameid];
+            MHIST_SET_NAME_DIM(result, new_id, xp_dim1);
+            MHIST_SET_NUM_DIM_NAMES(result, MHIST_NUM_DIM_NAMES(result) + 1);
+        }
+    }
+
+
+    pfree(xp);
+
+	zxcv("mhist_equijoin");
+
+    /* TODO: Should we free hist1 and hist2? */
+    PG_RETURN_MHIST_P(result);
+}
+
+/*
+ * Function that performs the approximate cross-product of two relations using
+ * MHISTs.
+ *
+ * Usage: mhist_crossprod(hist1, hist2)
+ *
+ * Where:
+ *			<hist1> and <hist2> are MHISTs created with mhist_in().
+ */
+Datum
+mhist_crossprod(PG_FUNCTION_ARGS)
+{
+	asdf("mhist_crossprod");
+
+    /* Function arguments. */
+    mhist		    *hist1 = PG_GETARG_MHIST_P(0);	
+    mhist		    *hist2 = PG_GETARG_MHIST_P(1);	
+
+    /* Locals */
+    mhist           *xp = NULL;
+
+    /* Check validity of arguments. */
+    check_mhist_integrity(hist1, "mhist_crossprod() hist1");
+    check_mhist_integrity(hist2, "mhist_crossprod() hist2");
+
+    xp = mhist_cross_product(hist1, hist2);	
+
+	zxcv("mhist_crossprod");
+
+    /* TODO: Should we free hist1 and hist2? */
+    PG_RETURN_MHIST_P(xp);
+}
+
+/*
+ * Function that performs the approximate equality selection on an MHIST.
+ *
+ * Usage: mhist_eq_selection(hist, col1, col2)
+ *
+ * Where:
+ *			<hist> is an MHIST created with mhist_in().
+ *			<col1> and <col2> are the names of columns stored inside <hist>.
+ */
+Datum
+mhist_eq_selection(PG_FUNCTION_ARGS)
+{
+	asdf("mhist_eq_selection");
+
+	/* Function arguments. */
+    mhist		    *hist = PG_GETARG_MHIST_P(0);	
+    char*			colname1 = PG_GETARG_CSTRING(1);
+    char*			colname2 = PG_GETARG_CSTRING(2);
+
+    /* Locals */
+    mhist           *result = NULL;
+    int             dim1 = -1, dim2 = -1;
+
+    /* Check validity of arguments. */
+    check_mhist_integrity(hist, "mhist_eq_selection()");
+
+    dim1 = get_dimid_for_name(hist->hdr.dimNames, 
+            MHIST_MAX_COL_NAMES, colname1);
+    dim2 = get_dimid_for_name(hist->hdr.dimNames, 
+            MHIST_MAX_COL_NAMES, colname2);
+
+    if (MHIST_NOT_A_DIMID == dim1) {
+        elog(ERROR, "mhist_eq_selection(): Dimension '%s' of '%s' not found.", 
+                colname1, mhist_to_str(hist));
+    }
+    if (MHIST_NOT_A_DIMID == dim2) {
+        elog(ERROR, "mhist_eq_selection(): Dimension '%s' of '%s' not found.", 
+                colname2, mhist_to_str(hist));
+    }
+
+    result = create_mhist_copy(hist);  
+
+    mhist_equality_select_in_place(result, dim1, dim2);
+
+    /* Eliminate zero-size buckets. */
+    remove_empty_mhist_buckets(result);
+
+	zxcv("mhist_eq_selection");
+
+	PG_RETURN_MHIST_P(result);
+}
+
+/* FUNCTION mhist_can_summarize_tuple
+ * ARGUMENTS: <desc> describes a type of tuple.
+ * POSTCONDITIONS: Returns TRUE if the current implementation of MHISTs can
+ *      summarize the indicated type of tuple.
+ */
+bool mhist_can_summarize_tuple(TupleDesc desc) {
+    int i = -1;
+    int ncols = get_num_non_timestamp_attrs(desc);
+
+	asdf("mhist_can_summarize_tuple");
+
+    for (i = 0; i < ncols; i++) {
+        
+        /* Skip the timestamp, if any. */
+        int attrid = get_attr_id_skipping_ts(i + 1, desc);
+
+
+        /* Is this attribute an integer? */ 
+        switch (desc->attrs[attrid - 1]->atttypid) {
+            case INT2OID:
+            case INT4OID:
+            case CHAROID:
+            case BYTEAOID:
+                /* We can insert values from this index into the MHIST. */
+                break;
+
+            default:
+                elog(LOG, "Can't build an MHIST on columns of type '%s'",
+                        format_type_with_typemod(desc->attrs[i]->atttypid,
+                            desc->attrs[i]->atttypmod));
+				zxcv("mhist_can_summarize_tuple");
+                return false;
+        }
+    }
+
+	zxcv("mhist_can_summarize_tuple");
+
+    /* Haven't found any indication that we can't summarize this tuple... */
+    return true;
+}
+
+/* FUNCTION get_timestamp_col
+ * ARGUMENTS: <desc> is a tuple descriptor.
+ * PRECONDITIONS: <desc> represents a stream.
+ * POSTCONDITIONS: Returns the attrid of the timestamp attribute for the
+ *      stream. 
+ */
+int get_timestamp_col(TupleDesc desc) 
+{
+    Assert(desc != NULL);
+
+    if (NULL == desc->constr) {
+        /* No constraints, so no timestamp. */
+        return TCQTSATTRNOP;
+    }
+
+    /* Someone decided to make ts_attrnum 0-based, rather than 1-based.  Rather
+     * than correct that mistake, we add 1 to convert to the actual attribute
+     * number. */
+    /* TODO: Fix the original problem. */
+    return 1 + (desc->constr->ts_attrnum);
+}
+
+/* FUNCTION get_num_non_timestamp_attrs
+ * ARGUMENTS: <desc> points to a tuple descriptor.
+ * POSTCONDITIONS: Returns the number of attributes of <desc> that are not
+ *      timestamps.
+ */
+int get_num_non_timestamp_attrs(TupleDesc desc) 
+{
+    int ts_col = -1;
+    Assert(desc != NULL);
+
+    ts_col = get_timestamp_col(desc);
+
+    if (ts_col != TCQTSATTRNOP) {
+        /* Have a timestamp column. */
+        return desc->natts - 1;
+    } else {
+        /* No timestamp. */
+        return desc->natts;
+    }
+
+}
+
+/* FUNCTION get_attr_id_skipping_ts
+ * ARGUMENTS: <id> is an attribute number and <desc> is a tuple descriptor.
+ * PRECONDITIONS: <id> is >= 1 and <= the number of non-TCQTIME attributes in
+ *      <desc>.
+ * POSTCONDITIONS: Returns the attribute number of the <id>th attribute that is
+ *      not a timestamp.
+ */
+int get_attr_id_skipping_ts(int id, TupleDesc desc) 
+{
+    int ts_col = -1;
+    Assert(desc != NULL);
+    Assert(id >= 1 && id <= get_num_non_timestamp_attrs(desc));
+
+    ts_col = get_timestamp_col(desc);
+
+    if (ts_col != TCQTSATTRNOP && id >= ts_col) {
+        return id + 1;
+    } else {
+        return id;
+    }
+}
+
+/* FUNCTION get_int_field_val
+ * ARGUMENTS: <tup> is a tuple described by <desc>, and <fnum> is a field
+ *      number for <tup>.  <skip_ts> is true if you wish to skip the timestamp
+ *      field of the tuple.
+ * PRECONDITIONS: <fnum> is 1-based, and the indicated field is not NULL.
+ * POSTCONDITIONS: Fetches the indicated field of the tuple, converts it into
+ *      32-bit integer, and returns it.
+ */
+int64 get_int_field_val(HeapTuple tup, TupleDesc desc, int fnum, bool skip_ts) {
+    Datum d;
+    int64 val = -1;
+    bool is_null = false;
+    Oid			typoid;
+    int16 typlen = -1;
+    bool typbyval = false;
+
+    Assert(tup != NULL);
+    Assert(desc != NULL);
+
+    /* We skip over the timestamp column, if desired. */
+    if (skip_ts) {
+        fnum = get_attr_id_skipping_ts(fnum, desc);
+    }
+                
+    /* Fetch the value of the tuple along this dimension. */
+    /* TODO: Should we be using fastgetattr()? */
+    d = heap_getattr(tup, fnum, desc, &is_null);
+
+    Assert(!is_null);
+
+    /* There are three kinds of integer, each of which has a different
+     * method of extracting from a Datum.  Figure out which one to use
+     * here. */
+    typoid = desc->attrs[fnum - 1]->atttypid;
+
+    get_typlenbyval(typoid, &typlen, &typbyval);
+
+    /*elog(LOG, "get_int_field_val(): Length of integer is %d.", typlen);*/
+
+    switch(typlen) {
+        case 1:
+            /* 8-bit integer... */
+            val = DatumGetUInt8(d);
+            break;
+
+        case 2:
+            val = DatumGetInt16(d);
+            break;
+
+        case 4:
+            val = DatumGetInt32(d);
+            break;
+
+        case 8:
+            val = DatumGetInt64(d);
+            break;
+
+        default:
+            elog(ERROR, "get_int_field_val(): Got invalid field length %d.",
+                    typlen);
+            break;
+    }
+
+    return val;
+}
+
+
+
+/*******************************************************************************
+ * INTERNAL FUNCTIONS 
+ ******************************************************************************/
+
+/* FUNCTION create_mhist_copy
+ * ARGUMENTS: <hist> is an MHIST.
+ * PRECONDITIONS: <hist> is properly initialized.
+ * POSTCONDITIONS: Creates a copy of <hist>.
+ */
+static mhist * create_mhist_copy(const mhist *orig) {
+    mhist *result = NULL;
+    int len = MHIST_SIZE(MHIST_NUM_BUCKETS_ALLOC(orig), MHIST_NUM_DIMS(orig));
+
+    result = palloc(len);
+
+    /* Since there are no pointers stored in the MHIST currently, we can just
+     * do a byte-by-byte copy. */
+    memcpy(result, orig, len);
+
+    check_mhist_integrity(result, "create_mhist_copy()");
+
+    return result;
+}
+
+/* FUNCTION create_bigger_mhist
+ * ARGUMENTS: <hist> is an MHIST.
+ * PRECONDITIONS: <hist>'s numBucketsUsed header field is set correctly.
+ * POSTCONDITIONS: Creates a copy of <hist> with some empty space for new
+ *      buckets.
+ */
+static mhist *create_bigger_mhist(mhist *hist) {
+    /* Double the size of the buckets array. */
+    mhist       *ret = create_mhist(2 * MHIST_NUM_BUCKETS_ALLOC(hist), 
+            MHIST_NUM_DIMS(hist));
+
+    /* Copy information about dimension names. */
+    memcpy(ret->hdr.dimNames, hist->hdr.dimNames, MHIST_NAMESZ);
+    MHIST_SET_NUM_DIM_NAMES(ret, MHIST_NUM_DIM_NAMES(hist));
+
+    /* Set the correct number of buckets used. */
+    MHIST_SET_NUM_BUCKETS_USED(ret, MHIST_NUM_BUCKETS_USED(hist));
+
+    /* Copy the buckets en masse. */
+    memcpy(ret->buckets, hist->buckets, 
+            MHIST_NUM_BUCKETS_USED(hist) 
+            * MHIST_BUCKET_LEN(MHIST_NUM_DIMS(hist)));
+
+    return ret; 
+}
+
+
+/* FUNCTION real_mhist_in
+ * ARGUMENTS: <relname> is the name of the relation on which to build a
+ *		histogram.  <cols> and <ncols> represent which columns of the relation
+ *		to build on, and <num_buckets> is the number of buckets to use.
+ * PRECONDITIONS: All strings are non-NULL, and the number of buckets is
+ *      greater than zero.
+ * POSTCONDITIONS: Attempts to create an mhist on the indicated relation.
+ *		Calls elog() on failure.
+ */
+static mhist * real_mhist_in(char *relname, 
+		char **cols, 
+		int ncols, 
+		int num_buckets)
+{
+	asdf("real_mhist_in");
+	mhist	    *result = NULL;
+
+	/* Create the histogram data structure. */
+	result = create_mhist(num_buckets, ncols);
+
+    /* Fill in information about the different dimensions. */
+    mhist_set_nameinfo(result, relname, cols, ncols);	
+
+
+	/* Load the histogram. */
+	load_mhist(relname, cols, result);
+
+	/* Pass our newly-minted histogram back up the stack. */
+	zxcv("real_mhist_in");
+	return result;
+}
+
+/* FUNCTION mhist_set_nameinfo
+ * ARGUMENTS: <relname> is a relation name, and <cols> is an array of column
+ * names.  <hist> is an MHIST.
+ * PRECONDITIONS: The histogram is to be built on the indicated columns of the
+ *      indicated relation, with dimensions in the same order as <cols>.
+ * POSTCONDITIONS: Fills in the dimension name information of the histogram.
+ */
+static void mhist_set_nameinfo(mhist *hist, char *relname, 
+                                char **cols, int ncols) {
+    int         dimid = -1;
+
+	for (dimid = 0; dimid < ncols; dimid++) {
+        MHIST_SET_NAME_DIM(hist, dimid, dimid);
+        snprintf(MHIST_DIM_NAME(hist, dimid), MHIST_COL_NAME_LEN, 
+                "%s.%s", relname, cols[dimid]);
+            /* We're assuming that the entire histogram was zeroed out;
+             * otherwise, we would have to pad the rest of the name field with
+             * zeros to avoid screwing up postgres hashing. */
+	}
+
+    MHIST_SET_NUM_DIM_NAMES(hist, ncols);
+}
+
+
+
+/* FUNCTION get_table_as_array
+ * ARGUMENTS: <relname>, <cols>, and <ncols> are as with real_mhist_in().
+ *      <ntuples> points to a location where we will put the number of tuples
+ *      read.  <descdest> points to where we will put a descriptor for the 
+ *      tuples in the array we return.
+ * POSTCONDITIONS: Creates an array of HeapTuples of the appropriate size 
+ *      and loads it with the contents of the indicated columns.  Returns 
+ *      the array, and sticks a tuple descriptor into  *descdest.
+ */
+static HeapTuple *get_table_as_array(char *relname, char **cols, int ncols,
+        int *ntuples, TupleDesc *descdest) {
+	asdf("get_table_as_array");
+
+	StringInfo	sql = makeStringInfo();
+	int			dimid = -1, return_code = -1, proc = -1;
+	int			tupnum = -1;
+
+    HeapTuple   *ret = NULL;
+        /* Will be returned. */
+
+    MemoryContext main_context;
+    MemoryContext oldcontext;
+
+    Assert(ntuples != NULL);
+
+
+    /* First, we snarf all the data out of the table.  At some point in the
+     * future, we will probably do some sampling here, but for now, we just
+     * snarf.
+     *
+     * In order to get at the tuples, we create a query, namely:
+     *
+     *      SELECT col1, col2, ... , coln FROM relation;
+     */
+	appendStringInfo(sql, "SELECT ");
+
+	for (dimid = 0; dimid < ncols; dimid++) {
+
+		appendStringInfo(sql, "%s.%s", relname, cols[dimid]);
+        if (dimid < ncols - 1) {
+            appendStringInfo(sql, ", ");
+        }
+	}
+	appendStringInfo(sql, " FROM %s" , relname);
+
+	elog(LOG, "get_table_as_array(): Query is '%s'.", sql->data);
+
+    /* The Server Programming Interface is going to switch memory contexts
+     * behind our back, so we need to remember what the original context was.
+     */
+    main_context = CurrentMemoryContext;
+
+	/* Connect to the Server Programming Interface (see Postgresql Programmer's
+	 * Reference, Chapter 17). */
+	if ((return_code = SPI_connect()) < 0) {
+		elog(ERROR, "get_table_as_array(): SPI_connect returned %d.", 
+                return_code);
+	}
+
+	/* Execute the query. */
+	SetQuerySnapshot();		/* Don't know why we need to do this. */
+	return_code = SPI_exec(sql->data, 0);
+		/* The 0 here for number of tuples to retrieve apparently means to get
+		 * as many tuples as possible. */
+	proc = SPI_processed;
+
+	elog(LOG, "get_table_as_array(): Query generated %d tuples.", proc);
+
+    /* Make a copy of the tuple descriptor.  See note above about memory
+     * contexts... */
+    oldcontext = MemoryContextSwitchTo(main_context);
+    *descdest = CreateTupleDescCopy(SPI_tuptable->tupdesc); 
+    MemoryContextSwitchTo(oldcontext);
+
+
+    /* SPECIAL CASE: Empty result. */
+    if (0 == proc) {
+        *ntuples = 0;
+        
+        /* The caller should free the returned pointer unconditionally, so we
+         * need to allocate _something_. */
+        /* See note above about memory contexts. */
+        oldcontext = MemoryContextSwitchTo(main_context);
+        ret = palloc(1);
+        MemoryContextSwitchTo(oldcontext);
+
+        goto cleanup;
+    }
+    /* END SPECIAL CASE */
+
+	/* Fetch the result tuples and put them into a giant array. */
+    *ntuples = proc;
+
+    /* See note above about memory contexts. */
+    oldcontext = MemoryContextSwitchTo(main_context);
+    ret = palloc(proc * sizeof(HeapTuple));
+    MemoryContextSwitchTo(oldcontext);
+
+	for (tupnum = 0; tupnum < proc; tupnum++) {
+        HeapTuple   current_tup;
+
+        /* See note above about memory contexts. */
+        oldcontext = MemoryContextSwitchTo(main_context);
+        current_tup = heap_copytuple(SPI_tuptable->vals[tupnum]);
+        MemoryContextSwitchTo(oldcontext);
+
+        ret[tupnum] = current_tup;
+	}
+
+
+cleanup:
+	
+	/* Disconnect from the Server Programming Interface. */
+	if ((return_code = SPI_finish()) != SPI_OK_FINISH) {
+		elog(ERROR, "get_table_as_array(): SPI_finish() returned %d.", 
+                return_code);
+	}
+
+	zxcv("get_table_as_array");
+
+    return ret;
+}
+
+/* FUNCTION split_a_bucket()
+ * ARGUMENTS: <giant_tup_array> is an array containing <ntuples> tuples,
+ *      currently divided into (contiguous) buckets at locations given by
+ *      <bucket_offsets> and <bucket_lengths>.
+ * PRECONDITIONS: <giant_tup_array> has been properly divided up.
+ * POSTCONDITIONS: If possible, splits one of the buckets and redistributes 
+ *      tuples accordingly.  Updates the <bucket_offsets> and <bucket_lengths> 
+ *      arrays.  Returns TRUE if it managed to split a bucket, FALSE 
+ *      otherwise.
+ */
+static bool split_a_bucket(HeapTuple *giant_tup_array, int ntuples, 
+        TupleDesc desc,
+        int *bucket_offsets, int *bucket_lengths, mhist *hist,
+        mhist_dim_info *ranges, int nbuckets_done) {
+
+    asdf("split_a_bucket");
+
+    int     split_bucketid = -1;
+    int     split_dimid = -1;
+    int     split_offset = -1;
+
+    int     offset = -1, length = -1;
+    HeapTuple     *bucketstart = NULL;
+
+    mhist_bucket *maxdiff_bucket_ptr = NULL;
+    mhist_bucket *new_bucket_ptr = MHIST_BUCKET_PTR(hist, nbuckets_done);
+        /* The actual bucket (currently empty) */
+
+    mhist_dim_info *maxdiff_di = NULL;
+    mhist_dim_info *newbucket_di = NULL;
+
+    int     split_val = -1;
+
+    int     ndims = (NULL == desc) ? 0 : get_num_non_timestamp_attrs(desc);
+
+    Assert(0.0 == new_bucket_ptr->count);
+
+    /*elog(LOG, "split_a_bucket(): Checking whether we can split.");*/
+
+    /* SPECIAL CASE: Can't perform any splits. */
+    if (no_buckets_to_split(giant_tup_array, bucket_offsets, bucket_lengths,
+                nbuckets_done, desc)) {
+        return false;
+    }
+    /* END SPECIAL CASE */
+
+    /*elog(LOG, "split_a_bucket(): Figuring out where to split.");*/
+
+    /* Figure out where to split. */
+    get_mhist_split_point(giant_tup_array, desc, bucket_offsets,
+            bucket_lengths, nbuckets_done, ranges, 
+            &split_bucketid, &split_dimid, &split_offset);
+
+    /*elog(LOG, "split_a_bucket(): Starting split.");*/
+
+    /* Now we split the chosen bucket along the chosen dimension. */
+    maxdiff_bucket_ptr = MHIST_BUCKET_PTR(hist, split_bucketid);
+    offset = bucket_offsets[split_bucketid];
+    length = bucket_lengths[split_bucketid];
+    bucketstart = giant_tup_array + offset;
+
+    /*elog(LOG, "split_a_bucket(): Sorting.");*/
+
+    /* First we sort along the dimension. */
+    sort_tuples_along_dim(bucketstart, length, split_dimid, desc);
+
+    /*elog(LOG, "split_a_bucket(): Performing bookkeeping.");*/
+
+    /* The sorting has already partitioned the tuples; we just need to
+     * make note of the new bucket boundary! */
+    bucket_offsets[nbuckets_done] = 
+            bucket_offsets[split_bucketid] + split_offset + 1;
+
+
+    /* The new bucket starts immediately after the current 
+     * tuple. */
+    bucket_lengths[nbuckets_done] = 
+            bucket_lengths[split_bucketid] - split_offset - 1;
+
+    bucket_lengths[split_bucketid] = split_offset;
+
+            
+    /* Adjust the bucket boundaries in the histogram data structure. */
+
+    /* We start out by making a copy of the bucket we just split. */
+    memcpy(new_bucket_ptr, maxdiff_bucket_ptr, MHIST_BUCKET_LEN(ndims));
+
+    /* The we adjust the boundaries of the copies so that they split
+     * the space along the maxdiff line. */ 
+    maxdiff_di = &(maxdiff_bucket_ptr->diminfo[split_dimid]);
+    newbucket_di = &(new_bucket_ptr->diminfo[split_dimid]);
+
+    split_val = get_int_field_val(bucketstart[split_offset], desc, 
+                                    split_dimid + 1, true);
+    /*
+    elog(LOG, "split_a_bucket(): Splitting at value %d on dimension %d.",
+            split_val, split_dimid);
+            */
+            
+    /* By convention, the bucket contains values x, such that
+     *      min_val <= x <= max_val.
+     */
+    maxdiff_di->max_val = split_val - 1;
+    newbucket_di->min_val = split_val;
+
+
+    /* We've created a hard boundary, so these two buckets should no
+     * longer accept outliers in the direction of the boundary. */
+    maxdiff_di->accept_above_max = false;
+    newbucket_di->accept_below_min = false;
+
+    zxcv("split_a_bucket");
+
+    return true;
+}
+
+/* Comparison function for passing to qsort.  The globals before it are used to
+ * pass parameters to it on a "side channel". */
+static int global_cmpf_dimid = -1;
+static TupleDesc global_cmpf_desc = NULL;
+static int cmpf(const void * left, const void * right) {
+    HeapTuple *left_ptr = (HeapTuple*)left; 
+    HeapTuple *right_ptr = (HeapTuple*)right; 
+    int left_val = get_int_field_val(*left_ptr, global_cmpf_desc,
+            global_cmpf_dimid + 1, true);
+    int right_val = get_int_field_val(*right_ptr, global_cmpf_desc,
+            global_cmpf_dimid + 1, true);
+
+    /* We return a number less than zero if left < right */   
+    if (left_val < right_val) {
+        return -1;
+    } else if (left_val == right_val) {
+        return 0;
+    } else {
+        return 1;
+    }
+}
+
+/* FUNCTION sort_tuples_along_dim()
+ * ARGUMENTS: <tuples> points to an array of <length> tuples, each of <ndims>
+ *      dimensions.  <dimid> is a dimension < <ndims>.
+ * PRECONDITIONS: The specified range stays within the actual bounds of the
+ *      array.
+ * POSTCONDITIONS: Sorts the range of tuples along dimension <dimid>.
+ */
+static void sort_tuples_along_dim(HeapTuple *tuples, int length, int dimid,
+        TupleDesc desc) {
+
+    asdf("sort_tuples_along_dim");
+
+    /* Tell our comparison function which dimension to pay attention to. */
+    global_cmpf_dimid = dimid;
+    global_cmpf_desc = desc;
+
+    /* Then we can let qsort do the heavy lifting... */
+    qsort(tuples, length, sizeof(*tuples), &cmpf);
+
+    zxcv("sort_tuples_along_dim");
+}
+
+
+
+/* FUNCTION load_mhist
+ * ARGUMENTS: <relname> and <cols> are as with real_mhist_in(), and
+ *		<hist> is an appropriately-sized and initialized MHIST data
+ *		structure.
+ * PRECONDITIONS: The histogram is initial...
 
[truncated message content]