From: Jouke W. <j.w...@gm...> - 2015-01-03 11:42:13
|
With a default factor identifier, the expensive filter_boxplot_factor routine can be made obsolete. This leads to more efficient plotting of boxplots. --- diff -Naur gnuplot-5.0.0.orig/src/graphics.c gnuplot-5.0.0/src/graphics.c --- gnuplot-5.0.0.orig/src/graphics.c 2014-11-22 05:12:53.000000000 +0100 +++ gnuplot-5.0.0/src/graphics.c 2015-01-03 01:24:35.787529832 +0100 @@ -107,7 +107,6 @@ static void plot_c_bars __PROTO((struct curve_points * plot)); static int compare_ypoints __PROTO((SORTFUNC_ARGS arg1, SORTFUNC_ARGS arg2)); static void plot_boxplot __PROTO((struct curve_points * plot)); -static int filter_boxplot_factor __PROTO((struct curve_points *plot, int level)); static void place_labels __PROTO((struct text_label * listhead, int layer, TBOOLEAN clip)); static void place_arrows __PROTO((int layer)); @@ -2682,110 +2681,57 @@ } } -int -filter_boxplot(struct curve_points *plot) -{ - int i; - int N = plot->p_count; - - /* Force any undefined points to the end of the list */ - for (i=0; i<N; i++) - if (plot->points[i].type == UNDEFINED) - plot->points[i].y = VERYLARGE; - - /* Sort the points to find median and quartiles */ - qsort(plot->points, N, sizeof(struct coordinate), compare_ypoints); - - /* Remove any undefined points */ - while (plot->points[N-1].type == UNDEFINED) - N--; - plot->p_count = N; - - return N; -} - -static int -filter_boxplot_factor(struct curve_points *plot, int level) -{ - int i, real_level; - int N = plot->p_count; - - /* Do we have to show the boxplots in alphabetical order of factors? */ - if (boxplot_opts.sort_factors && plot->boxplot_factor_order) - real_level = plot->boxplot_factor_order[level]; - else - real_level = level; - - /* If the factor doesn't match, - * change the point to undefined and force it to the end of the list */ - for (i=0; i<N; i++) { - plot->points[i].y = plot->points[i].yhigh; - plot->points[i].type = INRANGE; - if (plot->points[i].ylow != real_level) { - plot->points[i].type = UNDEFINED; - plot->points[i].y = VERYLARGE; - FPRINTF((stderr, "filter_boxplot_factor: rejecting point: level %d, factor %g, i %d\n", level, plot->points[i].ylow, i)); - } - } - - /* Sort the points to find median and quartiles */ - qsort(plot->points, N, sizeof(struct coordinate), compare_ypoints); - - /* Remove any undefined points */ - while (plot->points[N-1].type == UNDEFINED) - N--; - plot->p_count = N; - - return N; -} - static void plot_boxplot(struct curve_points *plot) { int N; - int saved_p_count; - struct coordinate *save_points = plot->points; + int remaining = plot->p_count; + int saved_p_count = plot->p_count; + struct coordinate GPHUGE *current_points = plot->points; + struct coordinate GPHUGE *saved_points = plot->points; struct coordinate candle; + double delta_x = 0; double median, quartile1, quartile3; double whisker_top, whisker_bot; - int level; - int levels = plot->boxplot_factors; - if (levels == 0) - levels = 1; - saved_p_count = plot->p_count; - - for (level=0; level<levels; level++) { - /* Sort the points and get rid of any that are undefined */ - /* EAM Feb 2011: Move this to boxplot_range_fiddling() */ - /* N = filter_boxplot(plot); */ - /* but we need filtering to make factored boxplots work: */ - if (levels > 1) { - plot->p_count = saved_p_count; - N = filter_boxplot_factor(plot, level); - } - else - N = plot->p_count; + + /* set N to the distance towards the first boxplot to plot */ + N = 0; + if (plot->boxplot_factors > 0) + while (N<remaining && current_points[N].ylow < 0) N++; + + do { + /* move to the next boxplot and count the number of its points */ + remaining -= N; + if (remaining == 0) + break; + current_points += N; + for (N = 0; + N < remaining && current_points[N].ylow == current_points->ylow; + N++); + + /* Sort the points to find median and quartiles */ + qsort(current_points, N, sizeof(struct coordinate), compare_ypoints); /* Not enough points left to make a boxplot */ if (N < 4) { - candle.x = plot->points->x + boxplot_opts.separation * level; + candle.x = current_points->x + delta_x; candle.yhigh = -VERYLARGE; candle.ylow = VERYLARGE; goto outliers; } if ((N & 0x1) == 0) - median = 0.5 * (plot->points[N/2 - 1].y + plot->points[N/2].y); + median = 0.5 * (current_points[N/2 - 1].y + current_points[N/2].y); else - median = plot->points[(N-1)/2].y; + median = current_points[(N-1)/2].y; if ((N & 0x3) == 0) - quartile1 = 0.5 * (plot->points[N/4 - 1].y + plot->points[N/4].y); + quartile1 = 0.5 * (current_points[N/4 - 1].y + current_points[N/4].y); else - quartile1 = plot->points[(N+3)/4 - 1].y; + quartile1 = current_points[(N+3)/4 - 1].y; if ((N & 0x3) == 0) - quartile3 = 0.5 * (plot->points[N - N/4].y + plot->points[N - N/4 - 1].y); + quartile3 = 0.5 * (current_points[N - N/4].y + current_points[N - N/4 - 1].y); else - quartile3 = plot->points[N - (N+3)/4].y; + quartile3 = current_points[N - (N+3)/4].y; FPRINTF((stderr,"Boxplot: quartile boundaries for %d points: %g %g %g\n", N, quartile1, median, quartile3)); @@ -2797,14 +2743,14 @@ int i; whisker_bot = quartile1 - whisker_len; for (i=0; i<N; i++) - if (plot->points[i].y >= whisker_bot) { - whisker_bot = plot->points[i].y; + if (current_points[i].y >= whisker_bot) { + whisker_bot = current_points[i].y; break; } whisker_top = quartile3 + whisker_len; for (i=N-1; i>= 0; i--) - if (plot->points[i].y <= whisker_top) { - whisker_top = plot->points[i].y; + if (current_points[i].y <= whisker_top) { + whisker_top = current_points[i].y; break; } @@ -2815,16 +2761,16 @@ int top = N-1; int bot = 0; while ((double)(top-bot+1)/(double)(N) >= boxplot_opts.limit_value) { - whisker_top = plot->points[top].y; - whisker_bot = plot->points[bot].y; + whisker_top = current_points[top].y; + whisker_bot = current_points[bot].y; if (whisker_top - median >= median - whisker_bot) { top--; - while ((top > 0) && (plot->points[top].y == plot->points[top-1].y)) + while ((top > 0) && (current_points[top].y == current_points[top-1].y)) top--; } if (whisker_top - median <= median - whisker_bot) { bot++; - while ((bot < top) && (plot->points[bot].y == plot->points[bot+1].y)) + while ((bot < top) && (current_points[bot].y == current_points[bot+1].y)) bot++; } } @@ -2833,14 +2779,14 @@ /* Dummy up a single-point candlesticks plot using these limiting values */ candle.type = INRANGE; if (plot->plot_type == FUNC) - candle.x = (plot->points[0].x + plot->points[N-1].x) / 2.; + candle.x = (current_points[0].x + current_points[N-1].x) / 2.; else - candle.x = plot->points->x + boxplot_opts.separation * level; + candle.x = current_points->x + delta_x; candle.y = quartile1; candle.z = quartile3; candle.ylow = whisker_bot; candle.yhigh = whisker_top; - candle.xlow = plot->points->xlow + boxplot_opts.separation * level; + candle.xlow = current_points->xlow + delta_x; candle.xhigh = median; /* Crazy order of candlestick parameters! */ plot->points = &candle; plot->p_count = 1; @@ -2850,26 +2796,23 @@ else plot_c_bars( plot ); - plot->points = save_points; - plot->p_count = N; - /* Now draw individual points for the outliers */ outliers: if (boxplot_opts.outliers) { int i,j,x,y; p_width = plot->lp_properties.p_size * term->h_tic; - for (i = 0; i < plot->p_count; i++) { + for (i = 0; i < N; i++) { - if (plot->points[i].y >= candle.ylow - && plot->points[i].y <= candle.yhigh) + if (current_points[i].y >= candle.ylow + && current_points[i].y <= candle.yhigh) continue; - if (plot->points[i].type != INRANGE) + if (current_points[i].type != INRANGE) continue; x = map_x(candle.x); - y = map_y(plot->points[i].y); + y = map_y(current_points[i].y); /* do clipping if necessary */ if (clip_points && (x < plot_bounds.xleft + p_width @@ -2880,13 +2823,16 @@ } /* Separate any duplicate outliers */ - for (j=1; (i >= j) && (plot->points[i].y == plot->points[i-j].y); j++) + for (j=1; (i >= j) && (current_points[i].y == current_points[i-j].y); j++) x += p_width * ((j & 1) == 0 ? -j : j);; (term->point) (x, y, plot->lp_properties.p_type); } } - } + delta_x += boxplot_opts.separation; + } while (current_points->ylow + 1 < plot->boxplot_factors); + plot->points = saved_points; + plot->p_count = saved_p_count; } diff -Naur gnuplot-5.0.0.orig/src/graphics.h gnuplot-5.0.0/src/graphics.h --- gnuplot-5.0.0.orig/src/graphics.h 2013-12-26 19:52:27.000000000 +0100 +++ gnuplot-5.0.0/src/graphics.h 2015-01-02 22:54:00.944166721 +0100 @@ -71,7 +71,6 @@ enum PLOT_SMOOTH plot_smooth; /* which "smooth" method to be used? */ double smooth_parameter; /* e.g. optional bandwidth for smooth kdensity */ int boxplot_factors; /* Only used if plot_style == BOXPLOT */ - int *boxplot_factor_order; /* Only used if plot_style == BOXPLOT */ int p_max; /* how many points are allocated */ int p_count; /* count of points in points */ int x_axis; /* FIRST_X_AXIS or SECOND_X_AXIS */ @@ -112,6 +111,4 @@ #define place_objects(listhead,layer,dimensions) /* void() */ #endif -int filter_boxplot __PROTO((struct curve_points *)); - #endif /* GNUPLOT_GRAPHICS_H */ diff -Naur gnuplot-5.0.0.orig/src/plot2d.c gnuplot-5.0.0/src/plot2d.c --- gnuplot-5.0.0.orig/src/plot2d.c 2015-01-03 01:34:10.374844052 +0100 +++ gnuplot-5.0.0/src/plot2d.c 2015-01-03 02:41:33.675309714 +0100 @@ -77,6 +77,8 @@ static void add_tics_boxplot_factors __PROTO((struct curve_points *plot)); static void sort_boxplot_factors __PROTO((struct curve_points *plot)); static int compare_boxplot_factors __PROTO((SORTFUNC_ARGS arg1, SORTFUNC_ARGS arg2)); +static int filter_boxplot_factor __PROTO((struct curve_points *plot, int level)); +static int compare_ylow __PROTO((SORTFUNC_ARGS arg1, SORTFUNC_ARGS arg2)); /* internal and external variables */ @@ -187,8 +189,6 @@ if (cp->labels) free_labels(cp->labels); cp->labels = NULL; - free(cp->boxplot_factor_order); - cp->boxplot_factor_order = NULL; if (cp->z_n) { int i; for (i = 0; i < cp->n_par_axes; i++) @@ -1587,9 +1587,7 @@ * in the order they were found in the file, and these numbers were written into the * plot->points structure. To change the order in which they will be plotted, * we have to sort the factor strings (which are stored in plot->labels), then - * fill plot->boxplot_factor_order with the order we've found. This is essentially - * a permutation of the sequence 0:N-1 where N is the number of levels the factor - * variable has (plot->boxplot_factors). */ + * update all the factor levels in plot->points. */ static void sort_boxplot_factors(struct curve_points *plot) { @@ -1602,7 +1600,7 @@ if (plot->boxplot_factors < 2 || !plot->labels || !plot->labels->next) return; - temp_labels = gp_alloc(plot->boxplot_factors * sizeof(temp_labels), "boxplot labels array"); + temp_labels = gp_alloc(plot->boxplot_factors * sizeof(text_label *), "boxplot labels array"); permutation = gp_alloc(plot->boxplot_factors * sizeof(int), "boxplot permutations array"); /* fill pointer array by walking the linked list */ @@ -1615,23 +1613,70 @@ } /* sort pointer array */ qsort(temp_labels, plot->boxplot_factors, sizeof(text_label *), compare_boxplot_factors); - - /* read out the tags from the text_labels from the - now sorted - pointer array, - * and write them into the permutation array in order */ + + /* update the tags from the text_labels of the - now sorted - pointer array, + * and construct a translation pemutation */ for (i=0; i<plot->boxplot_factors; i++) { - permutation[i] = temp_labels[i]->tag; + permutation[temp_labels[i]->tag] = i; + temp_labels[i]->tag = i; + } + + /* update all factors */ + /* this simplifies the plotting routine, as all boxplots end up in order */ + for (i=0; i<plot->p_count; i++) { + struct coordinate *point = &plot->points[i]; + if (point->type == UNDEFINED || point->ylow < 0 || point->ylow >= plot->boxplot_factors) + continue; + point->ylow = permutation[(int)point->ylow]; } /* and also re-link the list in sorted order for the tics */ this_label = plot->labels->next = temp_labels[0]; - for (i=0; i<plot->boxplot_factors-1; i++) { - this_label->next = temp_labels[i+1]; + for (i=1; i<plot->boxplot_factors; i++) { + this_label->next = temp_labels[i]; this_label = this_label->next; } this_label->next = NULL; free(temp_labels); - plot->boxplot_factor_order = permutation; + free(permutation); +} + +static int +compare_ylow(SORTFUNC_ARGS arg1, SORTFUNC_ARGS arg2) +{ + struct coordinate const *p1 = arg1; + struct coordinate const *p2 = arg2; + + if (p1->ylow > p2->ylow) + return (1); + if (p1->ylow < p2->ylow) + return (-1); + return (0); +} + +static int +filter_boxplot(struct curve_points *plot) +{ + int i; + int N = plot->p_count; + + /* Force any undefined points to the end of the list */ + for (i=0; i<N; i++) + if (plot->points[i].type == UNDEFINED) + plot->points[i].ylow = VERYLARGE; + + /* Group by the levels of the boxplot factors */ + if (plot->boxplot_factors > 1 && boxplot_opts.sort_factors) + sort_boxplot_factors(plot); + qsort(plot->points, N, sizeof(struct coordinate), compare_ylow); + + /* Remove any undefined points */ + while (plot->points[N-1].type == UNDEFINED) + N--; + plot->p_count = N; + + return N; } /* Autoscaling of box plots cuts off half of the box on each end. */ @@ -2821,12 +2866,10 @@ } /* if needed, sort boxplot factors and assign tic labels to them */ - if (this_plot->plot_style == BOXPLOT && this_plot->boxplot_factors > 0) { - if (boxplot_opts.sort_factors) - sort_boxplot_factors(this_plot); - if (boxplot_opts.labels != BOXPLOT_FACTOR_LABELS_OFF) - add_tics_boxplot_factors(this_plot); - } + if (this_plot->plot_style == BOXPLOT + && this_plot->boxplot_factors > 0 + && boxplot_opts.labels != BOXPLOT_FACTOR_LABELS_OFF) + add_tics_boxplot_factors(this_plot); /* sort */ switch (this_plot->plot_smooth) { diff -Naur gnuplot-5.0.0.orig/src/set.c gnuplot-5.0.0/src/set.c --- gnuplot-5.0.0.orig/src/set.c 2014-11-23 19:29:17.000000000 +0100 +++ gnuplot-5.0.0/src/set.c 2015-01-02 19:42:41.906236781 +0100 @@ -1032,7 +1032,7 @@ c_token++; boxplot_opts.separation = real_expression(); if (boxplot_opts.separation < 0) - int_error(c_token-1,"separation must be > 0"); + int_error(c_token-1,"separation must be non-negative"); } else if (almost_equals(c_token,"lab$els")) { c_token++; |