From: Robert D. <rob...@us...> - 2016-06-29 20:55:20
|
This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "Maxima CAS". The branch, master has been updated via 97f64687a1f35f14a51d056f3730eab151b2bd7c (commit) via c55e7915e3a2a009dd0e511f3761bfb452a6bda9 (commit) via d1e854b8dff32e72b91fc4eaee3dfb5e888afe72 (commit) via 3e1fa689f457a665708e8f66b68d33cd4fbc4211 (commit) via 7d7ed0d2864fb769ebfdc9008b3a1271b6279113 (commit) via 0907d88a8e0d11eff512ed250488574bfb33e97b (commit) via dc8ef8d3164800fbb050c28780d16af634952c17 (commit) from 468eb90cbbcc4e5c7ae16c2a2451a30250d05c54 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 97f64687a1f35f14a51d056f3730eab151b2bd7c Merge: c55e791 468eb90 Author: Robert Dodier <rob...@us...> Date: Wed Jun 29 13:53:47 2016 -0700 Merge branch 'master' of ssh://git.code.sf.net/p/maxima/code commit c55e7915e3a2a009dd0e511f3761bfb452a6bda9 Author: Robert Dodier <rob...@us...> Date: Wed Jun 29 13:53:11 2016 -0700 In continuous_freq, handle empty input correctly. diff --git a/share/descriptive/descriptive.mac b/share/descriptive/descriptive.mac index 00924f5..65b4a80 100644 --- a/share/descriptive/descriptive.mac +++ b/share/descriptive/descriptive.mac @@ -245,27 +245,30 @@ build_sample(tbl) := /* If sample values are all equal, this function returns only */ /* one class of amplitude 2 */ continuous_freq(lis,[opt]):=block([nc,mini,maxi,lim,amp,fr,ult,n,k,index,bins], - if length(opt) = 1 and listofnumbersp(opt[1]) - then ( mini: opt[1][1], - maxi: opt[1][2], - if length(opt[1]) = 3 /* min, max, and number of classes given */ - then nc: opt[1][3] - elseif length(opt[1]) = 2 /* min and max */ - then nc:10 - else error("continuous_freq: incorrect number of elements in optional argument.")) - else ( mini: lmin(lis), - maxi: lmax(lis), - if length(opt)=1 - then nc:opt[1] /* only number of classes given */ - else nc:10 ), /* default number of classes */ - lim:[mini], - amp:(maxi-mini)/nc, - if amp=0 - then [[lis[1]-1,lis[1]+1],[length(lis)]] - else ( for i:1 thru nc do lim:endcons(mini+amp*i,lim), - bins : makelist ([lim[i], lim[i + 1]], i, 1, length(lim) - 1), - fr : count_by_bins (lis, bins), - [lim,fr]) )$ + if length(lis) = 0 + then [[minf, inf], [0]] + else + (if length(opt) = 1 and listofnumbersp(opt[1]) + then ( mini: opt[1][1], + maxi: opt[1][2], + if length(opt[1]) = 3 /* min, max, and number of classes given */ + then nc: opt[1][3] + elseif length(opt[1]) = 2 /* min and max */ + then nc:10 + else error("continuous_freq: incorrect number of elements in optional argument.")) + else ( mini: lmin(lis), + maxi: lmax(lis), + if length(opt)=1 + then nc:opt[1] /* only number of classes given */ + else nc:10 ), /* default number of classes */ + lim:[mini], + amp:(maxi-mini)/nc, + if amp=0 + then [[lis[1]-1,lis[1]+1],[length(lis)]] + else ( for i:1 thru nc do lim:endcons(mini+amp*i,lim), + bins : makelist ([lim[i], lim[i + 1]], i, 1, length(lim) - 1), + fr : count_by_bins (lis, bins), + [lim,fr])) )$ count_by_bins (xx, bins) := block ([counts : makelist (0, length (bins))], xx : sort (xx), diff --git a/share/descriptive/rtest_descriptive.mac b/share/descriptive/rtest_descriptive.mac index 8e331d0..c711fe0 100644 --- a/share/descriptive/rtest_descriptive.mac +++ b/share/descriptive/rtest_descriptive.mac @@ -233,6 +233,9 @@ principal_components(s2); /* DATA MANIPULATION */ +continuous_freq ([]); +[[minf, inf], [0]]; + continuous_freq (s1, 5); [[0,9/5,18/5,27/5,36/5,9],[16,24,18,17,25]]$ commit d1e854b8dff32e72b91fc4eaee3dfb5e888afe72 Author: Robert Dodier <rob...@us...> Date: Wed Jun 29 13:42:08 2016 -0700 In discrete_freq, count data by sorting and then searching for indices. This is faster than the previous method. Also include more tests for discrete_freq. diff --git a/share/descriptive/descriptive.mac b/share/descriptive/descriptive.mac index c0533ae..00924f5 100644 --- a/share/descriptive/descriptive.mac +++ b/share/descriptive/descriptive.mac @@ -311,19 +311,14 @@ find_index_last_1 (xx, x, i0, i1, comparison) := /* Counts the frequency of each element in 'lis', its elements */ /* can be numbers, Maxima expressions or strings. */ -discrete_freq(lis):=block([n:length(lis),fr:[],sum,val,set:[],c], - lis: sort(lis), - c:1, - while c<=n do( - sum: 0, - val: lis[c], - set: endcons(val,set), - while c<=n and lis[c]=val do( - sum: sum+1, - c: c+1), - fr: endcons(sum,fr) ), - [set,fr] )$ +discrete_freq (l) := block ([u], + l : sort (l), + u : unique (l), + [u, map (lambda ([u1], find_index_last (l, u1, lambda ([x,y], not ordergreatp(x, y))) + - find_index_first (l, u1, lambda([x,y], not orderlessp(x, y))) + + 1), + u)]); /* UNIVARIATE DESCRIPTIVE STATISTICS */ diff --git a/share/descriptive/rtest_descriptive.mac b/share/descriptive/rtest_descriptive.mac index 0f4243e..8e331d0 100644 --- a/share/descriptive/rtest_descriptive.mac +++ b/share/descriptive/rtest_descriptive.mac @@ -287,9 +287,50 @@ true; test_continuous_freq (append (makelist (random (10), 50), 50 + makelist (random (10), 50)), 10); true; +discrete_freq ([]); +[[], []]; + discrete_freq (s1); [[0,1,2,3,4,5,6,7,8,9],[8,8,12,12,10,8,9,8,12,13]]$ +discrete_freq (map (lambda ([x], printf (false, "~r", x)), s1)); +[["eight", "five", "four", "nine", "one", "seven", "six", "three", "two", "zero"], + [12, 8, 10, 13, 8, 8, 9, 12, 12, 8]]; + +discrete_freq (map (lambda ([x], concat ('x, printf (false, "~r", x))), s1)); +['[xeight, xfive, xfour, xnine, xone, xseven, xsix, xthree, xtwo, xzero], + [12, 8, 10, 13, 8, 8, 9, 12, 12, 8]]; + +(test_discrete_freq (xx) := block ([output, counts], + output : discrete_freq (xx), + counts : count_via_sublist (xx), + if counts = output[2] + then true + else ['data = xx, 'discrete_freq = output, 'count_via_sublist = counts]), + count_via_sublist (xx) := + map (lambda ([x1], length (sublist (xx, lambda ([x], x = x1)))), unique (xx)), + set_random_state (make_random_state (2)), + 0); +0; + +test_discrete_freq (makelist (random (20), 1)); +true; + +test_discrete_freq (makelist (random (20), 10)); +true; + +test_discrete_freq (makelist (random (20), 100)); +true; + +test_discrete_freq (makelist (random (20), 1000)); +true; + +test_discrete_freq (makelist (random (100), 100) * 0.25); +true; + +test_discrete_freq (append (makelist (random (20), 50), 50 + makelist (random (20), 50))); +true; + part(standardize(s1), [1, 10, 20]); [-171/sqrt(84259),-171/sqrt(84259),-71/sqrt(84259)]$ commit 3e1fa689f457a665708e8f66b68d33cd4fbc4211 Author: Robert Dodier <rob...@us...> Date: Wed Jun 29 12:01:15 2016 -0700 In continuous_freq, do not bother to compile count_by_bins and friends, as it generates confusing warning messages, and does not appear to make continuous_freq any faster. diff --git a/share/descriptive/descriptive.mac b/share/descriptive/descriptive.mac index b2c0026..c0533ae 100644 --- a/share/descriptive/descriptive.mac +++ b/share/descriptive/descriptive.mac @@ -308,8 +308,6 @@ find_index_last_1 (xx, x, i0, i1, comparison) := then find_index_last_1 (xx, x, i, i1, comparison) else find_index_last_1 (xx, x, i0, i, comparison)); -compile (count_by_bins, find_index_first, find_index_first_1, find_index_last, find_index_last_1); - /* Counts the frequency of each element in 'lis', its elements */ /* can be numbers, Maxima expressions or strings. */ commit 7d7ed0d2864fb769ebfdc9008b3a1271b6279113 Author: Robert Dodier <rob...@us...> Date: Wed Jun 29 10:13:00 2016 -0700 In continuous_freq, rename count_em_up to count_by_bins, and have it return the bin counts instead of assigning values to an argument. diff --git a/share/descriptive/descriptive.mac b/share/descriptive/descriptive.mac index 58cf727..b2c0026 100644 --- a/share/descriptive/descriptive.mac +++ b/share/descriptive/descriptive.mac @@ -263,18 +263,18 @@ continuous_freq(lis,[opt]):=block([nc,mini,maxi,lim,amp,fr,ult,n,k,index,bins], if amp=0 then [[lis[1]-1,lis[1]+1],[length(lis)]] else ( for i:1 thru nc do lim:endcons(mini+amp*i,lim), - fr:makelist(0,i,1,nc), bins : makelist ([lim[i], lim[i + 1]], i, 1, length(lim) - 1), - count_em_up (lis, bins, fr), + fr : count_by_bins (lis, bins), [lim,fr]) )$ -count_em_up (xx, bins, counts) := - (xx : sort (xx), +count_by_bins (xx, bins) := block ([counts : makelist (0, length (bins))], + xx : sort (xx), for k thru length (bins) do block ([i_first : find_index_first (xx, bins[k][1], if k > 1 and bins[k][1] = bins[k - 1][2] then ">" else ">="), i_last : find_index_last (xx, bins[k][2], "<=")], - counts[k] : if i_last = false or i_first = false then 0 else i_last - i_first + 1)); + counts[k] : if i_last = false or i_first = false then 0 else i_last - i_first + 1), + counts); /* assume xx is sorted; find least i s.t. xx[i] > x or xx[i] >= x */ @@ -308,7 +308,7 @@ find_index_last_1 (xx, x, i0, i1, comparison) := then find_index_last_1 (xx, x, i, i1, comparison) else find_index_last_1 (xx, x, i0, i, comparison)); -compile (count_em_up, find_index_first, find_index_first_1, find_index_last, find_index_last_1); +compile (count_by_bins, find_index_first, find_index_first_1, find_index_last, find_index_last_1); /* Counts the frequency of each element in 'lis', its elements */ commit 0907d88a8e0d11eff512ed250488574bfb33e97b Author: Robert Dodier <rob...@us...> Date: Tue Jun 28 21:20:08 2016 -0700 In continuous_freq, exclude the left end of the bin and include the right end. This is consistent with the default behavior of hist and truehist in R (and also consistent with the previous behavior of continuous_freq). diff --git a/share/descriptive/descriptive.mac b/share/descriptive/descriptive.mac index e48273f..58cf727 100644 --- a/share/descriptive/descriptive.mac +++ b/share/descriptive/descriptive.mac @@ -271,9 +271,9 @@ continuous_freq(lis,[opt]):=block([nc,mini,maxi,lim,amp,fr,ult,n,k,index,bins], count_em_up (xx, bins, counts) := (xx : sort (xx), for k thru length (bins) - do block ([i_first : find_index_first (xx, bins[k][1], ">="), - i_last : find_index_last (xx, bins[k][2], - if k < length(bins) and bins[k + 1][1] = bins[k][2] then "<" else "<=")], + do block ([i_first : find_index_first (xx, bins[k][1], + if k > 1 and bins[k][1] = bins[k - 1][2] then ">" else ">="), + i_last : find_index_last (xx, bins[k][2], "<=")], counts[k] : if i_last = false or i_first = false then 0 else i_last - i_first + 1)); /* assume xx is sorted; find least i s.t. xx[i] > x or xx[i] >= x */ diff --git a/share/descriptive/rtest_descriptive.mac b/share/descriptive/rtest_descriptive.mac index 57ebcc2..0f4243e 100644 --- a/share/descriptive/rtest_descriptive.mac +++ b/share/descriptive/rtest_descriptive.mac @@ -251,8 +251,8 @@ continuous_freq (s1, [-2,12]); then true else ['data = xx, 'continuous_freq = output, 'count_via_sublist = counts]), count_via_sublist (xx, bins) := - append (map (lambda ([pq], length (sublist (xx, lambda ([x], pq[1] <= x and x < pq[2])))), rest(bins, -1)), - [length (sublist (xx, lambda ([x], last(bins)[1] <= x and x <= last(bins)[2])))]), + append ([length (sublist (xx, lambda ([x], first(bins)[1] <= x and x <= first(bins)[2])))], + map (lambda ([pq], length (sublist (xx, lambda ([x], pq[1] < x and x <= pq[2])))), rest(bins))), set_random_state (make_random_state (1)), 0); 0; commit dc8ef8d3164800fbb050c28780d16af634952c17 Author: Robert Dodier <rob...@us...> Date: Tue Jun 28 16:08:08 2016 -0700 In continuous_freq, count data falling into bins by sorting the data and then finding the indices of the bin boundaries in the sorted data. It is anticipated that this method is faster than the previous. diff --git a/share/descriptive/descriptive.mac b/share/descriptive/descriptive.mac index d6e3343..e48273f 100644 --- a/share/descriptive/descriptive.mac +++ b/share/descriptive/descriptive.mac @@ -244,7 +244,7 @@ build_sample(tbl) := /* classes we want, OR a list containing only the limits. */ /* If sample values are all equal, this function returns only */ /* one class of amplitude 2 */ -continuous_freq(lis,[opt]):=block([nc,mini,maxi,lim,amp,fr,ult,n,k,index], +continuous_freq(lis,[opt]):=block([nc,mini,maxi,lim,amp,fr,ult,n,k,index,bins], if length(opt) = 1 and listofnumbersp(opt[1]) then ( mini: opt[1][1], maxi: opt[1][2], @@ -264,14 +264,51 @@ continuous_freq(lis,[opt]):=block([nc,mini,maxi,lim,amp,fr,ult,n,k,index], then [[lis[1]-1,lis[1]+1],[length(lis)]] else ( for i:1 thru nc do lim:endcons(mini+amp*i,lim), fr:makelist(0,i,1,nc), - for i:1 thru length(lis) do ( - if lis[i]=mini - then index: 1 - elseif lis[i] = maxi then index : nc - else index: ceiling((lis[i]-mini)/amp), - fr[index]:fr[index]+1 ), + bins : makelist ([lim[i], lim[i + 1]], i, 1, length(lim) - 1), + count_em_up (lis, bins, fr), [lim,fr]) )$ +count_em_up (xx, bins, counts) := + (xx : sort (xx), + for k thru length (bins) + do block ([i_first : find_index_first (xx, bins[k][1], ">="), + i_last : find_index_last (xx, bins[k][2], + if k < length(bins) and bins[k + 1][1] = bins[k][2] then "<" else "<=")], + counts[k] : if i_last = false or i_first = false then 0 else i_last - i_first + 1)); + +/* assume xx is sorted; find least i s.t. xx[i] > x or xx[i] >= x */ + +find_index_first (xx, x, comparison) := + if comparison(last(xx), x) + then if comparison(first(xx), x) + then 1 + else find_index_first_1 (xx, x, 1, length(xx), comparison); + +find_index_first_1 (xx, x, i0, i1, comparison) := + if i1 - i0 <= 1 then i1 + else + block ([i : floor (i0 + (i1 - i0) / 2)], + if comparison(xx[i], x) + then find_index_first_1 (xx, x, i0, i, comparison) + else find_index_first_1 (xx, x, i, i1, comparison)); + +/* assume xx is sorted; find greatest i s.t. xx[i] < x or xx[i] <= x */ + +find_index_last (xx, x, comparison) := + if comparison(first(xx), x) + then if comparison(last(xx), x) + then length(xx) + else find_index_last_1 (xx, x, 1, length(xx), comparison); + +find_index_last_1 (xx, x, i0, i1, comparison) := + if i1 - i0 <= 1 then i0 + else + block ([i : floor (i0 + (i1 - i0) / 2)], + if comparison(xx[i], x) + then find_index_last_1 (xx, x, i, i1, comparison) + else find_index_last_1 (xx, x, i0, i, comparison)); + +compile (count_em_up, find_index_first, find_index_first_1, find_index_last, find_index_last_1); /* Counts the frequency of each element in 'lis', its elements */ diff --git a/share/descriptive/rtest_descriptive.mac b/share/descriptive/rtest_descriptive.mac index e2c2aa8..57ebcc2 100644 --- a/share/descriptive/rtest_descriptive.mac +++ b/share/descriptive/rtest_descriptive.mac @@ -243,6 +243,50 @@ continuous_freq (s1, [-2,12]); [[-2,-3/5,4/5,11/5,18/5,5,32/5,39/5,46/5,53/5,12],[0,8,20,12,18,9,8,25, 0,0]]$ +(test_continuous_freq (xx, foo) := block ([output, bins, counts], + output : continuous_freq (xx, foo), + bins : makelist ([output[1][i], output[1][i + 1]], i, 1, length(output[1]) - 1), + counts : count_via_sublist (xx, bins), + if counts = output[2] + then true + else ['data = xx, 'continuous_freq = output, 'count_via_sublist = counts]), + count_via_sublist (xx, bins) := + append (map (lambda ([pq], length (sublist (xx, lambda ([x], pq[1] <= x and x < pq[2])))), rest(bins, -1)), + [length (sublist (xx, lambda ([x], last(bins)[1] <= x and x <= last(bins)[2])))]), + set_random_state (make_random_state (1)), + 0); +0; + +test_continuous_freq (makelist (random (100), 100) * 0.25, 17); +true; + +test_continuous_freq (makelist (random (100), 100) * 0.25, 100); +true; + +test_continuous_freq (makelist (random (100), 100) * 0.25, 1); +true; + +test_continuous_freq (makelist (random (100), 100) * 0.25, [0, 25]); +true; + +test_continuous_freq (makelist (random (100), 100) * 0.25, [0, 25, 2]); +true; + +test_continuous_freq (makelist (random (100), 100) * 0.25, [0, 25, 20]); +true; + +test_continuous_freq (makelist (random (100), 100) * 0.25, [-1000, 1000]); +true; + +test_continuous_freq (makelist (random (100), 100) * 0.25, [-990, 1010]); +true; + +test_continuous_freq (makelist (random (10), 100), 10); +true; + +test_continuous_freq (append (makelist (random (10), 50), 50 + makelist (random (10), 50)), 10); +true; + discrete_freq (s1); [[0,1,2,3,4,5,6,7,8,9],[8,8,12,12,10,8,9,8,12,13]]$ ----------------------------------------------------------------------- Summary of changes: share/descriptive/descriptive.mac | 113 ++++++++++++++++++++----------- share/descriptive/rtest_descriptive.mac | 88 ++++++++++++++++++++++++ 2 files changed, 161 insertions(+), 40 deletions(-) hooks/post-receive -- Maxima CAS |