|
From: Nirmal S. <sha...@gm...> - 2014-02-02 05:49:44
|
This is the explain plan for the query with limit 10000.
Limit (cost=0.00..2.50 rows=1000 width=908) (actual
time=1586.926..1836.081 rows=10000 loops=1)
Output: (COALESCE((fgpc.date_id)::bigint, fgcd.date_id)),
fgpc.m_ad_grp_pub_key, fgpc.m_kw_pub_key, kws.expr_names, kws.expr_values,
kws.m_ad_grp_semid, (sum(fgpc.m_imps)), (sum(fgpc.m_clicks)),
(sum(fgpc.m_cost)), (sum(fgpc.m_conv_1pc)), (sum(fgpc.m_conv_mpc)), (avg(f
gpc.m_cnv_rate_1pc)), (avg(fgpc.m_cnv_rate_mpc)), (avg(fgpc.m_avg_cpc)),
(avg(fgpc.m_max_cpc)), (avg(fgpc.m_firstpage_cpc)),
(avg(fgpc.m_topofpage_cpc)), (avg(fgpc.m_avg_cpm)), (avg(fgpc.m_max_cpm)),
(avg(fgpc.m_max_cpa_pct)), (avg(fgpc.m_avg_pos)), (avg(fgpc.m_lowest_pos
)), (avg(fgpc.m_highest_pos)), (avg(fgpc.m_quality_score)),
(avg(fgpc.m_view_thru_conv)), (sum(fgcd.m_revenue)),
(sum(fgcd.m_conversions)), (sum(COALESCE(kws.m_new_kw_bid, kws.m_kw_bid))),
(max(COALESCE(kws.m_new_kw_bid, kws.m_kw_bid))),
(min(COALESCE(kws.m_new_kw_bid, kw
s.m_kw_bid)))
-> Data Node Scan on "__REMOTE_LIMIT_QUERY__" (cost=0.00..2.50
rows=1000 width=908) (actual time=1586.924..1834.118 rows=10000 loops=1)
Output: (COALESCE((fgpc.date_id)::bigint, fgcd.date_id)),
fgpc.m_ad_grp_pub_key, fgpc.m_kw_pub_key, kws.expr_names, kws.expr_values,
kws.m_ad_grp_semid, (sum(fgpc.m_imps)), (sum(fgpc.m_clicks)),
(sum(fgpc.m_cost)), (sum(fgpc.m_conv_1pc)), (sum(fgpc.m_conv_mpc)),
(avg(fgpc.m_cnv_rate_1pc)), (avg(fgpc.m_cnv_rate_mpc)),
(avg(fgpc.m_avg_cpc)), (avg(fgpc.m_max_cpc)), (avg(fgpc.m_firstpage_cpc)),
(avg(fgpc.m_topofpage_cpc)), (avg(fgpc.m_avg_cpm)), (avg(fgpc.m_max_cpm)),
(avg(fgpc.m_max_cpa_pct)), (avg(fgpc.m_avg_pos)), (avg(fgpc.m_lowe
st_pos)), (avg(fgpc.m_highest_pos)), (avg(fgpc.m_quality_score)),
(avg(fgpc.m_view_thru_conv)), (sum(fgcd.m_revenue)),
(sum(fgcd.m_conversions)), (sum(COALESCE(kws.m_new_kw_bid, kws.m_kw_bid))),
(max(COALESCE(kws.m_new_kw_bid, kws.m_kw_bid))),
(min(COALESCE(kws.m_new_kw_b
id, kws.m_kw_bid)))
Node/s: d11, d12, d13, d14, d15, d16
Remote query: SELECT COALESCE((l.a_1)::bigint, l.a_23), l.a_2,
l.a_3, r.a_1, r.a_2, r.a_3, sum(l.a_4), sum(l.a_5), sum(l.a_6), sum(l.a_7),
sum(l.a_8), pg_catalog.numeric_avg(avg(l.a_9)),
pg_catalog.numeric_avg(avg(l.a_10)), pg_catalog.numeric_avg(avg(l.a_11)), pg
_catalog.numeric_avg(avg(l.a_12)), pg_catalog.numeric_avg(avg(l.a_13)),
pg_catalog.numeric_avg(avg(l.a_14)), pg_catalog.numeric_avg(avg(l.a_15)),
pg_catalog.numeric_avg(avg(l.a_16)), pg_catalog.numeric_avg(avg(l.a_17)),
pg_catalog.numeric_avg(avg(l.a_18)), pg_catalog.nume
ric_avg(avg(l.a_19)), pg_catalog.numeric_avg(avg(l.a_20)),
pg_catalog.numeric_avg(avg(l.a_21)), pg_catalog.numeric_avg(avg(l.a_22)),
sum(l.a_24), sum(l.a_25), sum(COALESCE(r.a_4, r.a_5)), max(COALESCE(r.a_4,
r.a_5)), min(COALESCE(r.a_4, r.a_5)) FROM ((SELECT l.a_1, l.a_2,
l.a_3, l.a_4, l.a_5, l.a_6, l.a_7, l.a_8, l.a_9, l.a_10, l.a_11, l.a_12,
l.a_13, l.a_14, l.a_15, l.a_16, l.a_17, l.a_18, l.a_19, l.a_20, l.a_21,
l.a_22, r.a_1, r.a_2, r.a_3 FROM ((SELECT fgpc.date_id,
fgpc.m_ad_grp_pub_key, fgpc.m_kw_pub_key, fgpc.m_imps, fgpc.m_clicks,
fgpc.m_cost, fgpc.m_conv_1pc, fgpc.m_conv_mpc, fgpc.m_cnv_rate_1pc,
fgpc.m_cnv_rate_mpc, fgpc.m_avg_cpc, fgpc.m_max_cpc, fgpc.m_firstpage_cpc,
fgpc.m_topofpage_cpc, fgpc.m_avg_cpm, fgpc.m_max_cpm, fgpc.m_max_cpa_pct,
fgpc.m_avg_pos, fgpc.m_lowest_pos, fgpc.m_highest_pos,
fgpc.m_quality_score, fgpc.m_view_thru_conv FROM ONLY
bidw.fact_msn_kw_perf_daily fgpc WHERE true) l(a_1, a_2, a_3, a_4, a_5,
a_6, a_7, a_8, a_9, a_10, a_11, a_12, a_13, a_14, a_15, a_16, a_17, a_18,
a_19, a_20, a_21, a_22) LEFT JOIN (SELECT fgcd.date_id, fgcd.m_revenue,
fgcd.m_conversions, fgcd.m_ad_grp_pub_key, fgcd.m_kw_pub_key FROM ONLY
bidw.fact_msn_kw_conversion_daily fgcd WHERE true) r(a_1, a_2, a_3, a_4,
a_5) ON (((l.a_1 = l.a_1) AND (l.a_2 = r.a_4) AND (l.a_3 = r.a_5)))) WHERE
((COALESCE((l.a_1)::bigint, r.a_1) >= 20131201) AND (
COALESCE((l.a_1)::bigint, r.a_1) <= 20140119))) l(a_1, a_2, a_3, a_4, a_5,
a_6, a_7, a_8, a_9, a_10, a_11, a_12, a_13, a_14, a_15, a_16, a_17, a_18,
a_19, a_20, a_21, a_22, a_23, a_24, a_25) JOIN (SELECT kws.expr_names,
kws.expr_values, kws.m_ad_grp_semid, kws.m_new_kw_bi
d, kws.m_kw_bid, kws.m_kw_pub_key, kws.m_ad_grp_pub_key FROM ONLY
biods.msn_keyword_sup kws WHERE true) r(a_1, a_2, a_3, a_4, a_5, a_6, a_7)
ON (true)) WHERE ((l.a_2 = r.a_7) AND (l.a_3 = r.a_6)) GROUP BY 1, 2, 3, 4,
5, 6 LIMIT 10000::bigint
Total runtime: 2194.762 ms
(7 rows)
On Sat, Feb 1, 2014 at 7:44 PM, Koichi Suzuki <koi...@gm...> wrote:
> Could you share "explain" result to see how plan works fine.
>
> Regards;
> ---
> Koichi Suzuki
>
>
> 2014-02-02 Nirmal Sharma <sha...@gm...>:
> > Hi,
> >
> > These are the timings for adding limit with different amount.
> > With these timings you can see see that here the bottleneck is
> coordinator
> > (i.e. retrieving data from various nodes to coordinator ).
> > I just want to ask whether its normal or not?
> >
> > ---for limit 1000
> > [postgres@sv4-pgxc-db04 test]$ time psql -d myDB -f "test.sql" > a.out
> >
> > real 0m1.935s
> > user 0m0.051s
> > sys 0m0.002s
> >
> > ---for limit 10000
> > [postgres@sv4-pgxc-db04 test]$ time psql -d myDB -f "test.sql" > a.out
> >
> > real 0m2.724s
> > user 0m0.481s
> > sys 0m0.023s
> >
> > --for limit 100000
> > [postgres@sv4-pgxc-db04 test]$ time psql -d myDB -f "test.sql" > a.out
> >
> > real 0m12.102s
> > user 0m3.139s
> > sys 0m0.146s
> >
> > --for limit 200000
> > [postgres@sv4-pgxc-db04 test]$ time psql -d myDB -f "test.sql" > a.out
> >
> > real 0m13.078s
> > user 0m5.507s
> > sys 0m0.316s
> >
> > ---for limit 400000
> > [postgres@sv4-pgxc-db04 test]$ time psql -d myDB -f "test.sql" > a.out
> >
> > real 0m18.820s
> > user 0m10.482s
> > sys 0m0.659s
> >
> > ---for limit 600000
> > [postgres@sv4-pgxc-db04 test]$ time psql -d myDB -f "test.sql" > a.out
> >
> > real 0m23.478s
> > user 0m15.631s
> > sys 0m0.940s
> > [postgres@sv4-pgxc-db04 test]$
> >
> >
> > I will also enable the statement log and try again and will send the
> output
> > soon.
> >
> > Nirmal
> >
> >
> > On Sat, Feb 1, 2014 at 1:47 PM, Mason Sharp <ms...@tr...>
> wrote:
> >>
> >>
> >>
> >>
> >> On Sat, Feb 1, 2014 at 2:40 PM, Nirmal Sharma <sha...@gm...>
> >> wrote:
> >>>
> >>> Hi Mason,
> >>>
> >>> This is the actual query that i was running.
> >>>
> >>> select coalesce(fgpc.date_id,fgcd.date_id)
> >>> date_id,
> >>> fgpc.m_ad_grp_pub_key
> m_ad_grp_pub_key,
> >>> fgpc.m_kw_pub_key m_kw_pub_key,
> >>> kws.expr_names,
> >>> kws.expr_values,
> >>> kws.m_ad_grp_semid,
> >>> sum(fgpc.m_imps) m_imps,
> >>> sum(fgpc.m_clicks) m_clicks,
> >>> sum(fgpc.m_cost) m_cost,
> >>> sum(fgpc.m_conv_1pc) m_conv_1pc,
> >>> sum(fgpc.m_conv_mpc) m_conv_mpc,
> >>> avg(fgpc.m_cnv_rate_1pc)
> m_cnv_rate_1pc,
> >>> avg(fgpc.m_cnv_rate_mpc)
> m_cnv_rate_mpc,
> >>> avg(fgpc.m_avg_cpc) m_avg_cpc,
> >>> avg(fgpc.m_max_cpc) m_max_cpc,
> >>> avg(fgpc.m_firstpage_cpc)
> >>> m_firstpage_cpc,
> >>> avg(fgpc.m_topofpage_cpc)
> >>> m_topofpage_cpc,
> >>> avg(fgpc.m_avg_cpm) m_avg_cpm,
> >>> avg(fgpc.m_max_cpm) m_max_cpm,
> >>> avg(fgpc.m_max_cpa_pct) m_max_cpa_pct,
> >>> avg(fgpc.m_avg_pos) m_avg_pos,
> >>> avg(fgpc.m_lowest_pos) m_lowest_pos,
> >>> avg(fgpc.m_highest_pos) m_highest_pos,
> >>> avg(fgpc.m_quality_score)
> >>> m_quality_score,
> >>> avg(fgpc.m_view_thru_conv)
> >>> m_view_thru_conv,
> >>> sum(fgcd.m_revenue) m_revenue,
> >>> sum(fgcd.m_conversions) m_conversions,
> >>> sum(coalesce(kws.m_new_kw_bid,
> >>> kws.m_kw_bid)) m_kw_total_bid,
> >>> max(coalesce(kws.m_new_kw_bid,
> >>> kws.m_kw_bid)) m_kw_max_bid,
> >>> min(coalesce(kws.m_new_kw_bid,
> >>> kws.m_kw_bid)) m_kw_min_bid
> >>> from
> >>> bidw.fact_msn_kw_perf_daily fgpc
> >>> full outer join bidw.fact_msn_kw_conversion_daily fgcd on
> >>> fgpc.m_ad_grp_pub_key = fgcd.m_ad_grp_pub_key and fgpc.m_kw_pub_key =
> >>> fgcd.m_kw_pub_key and fgpc.date_id = fgpc.date_id
> >>> join biods.msn_keyword_sup kws
> on
> >>> fgpc.m_kw_pub_key = kws.m_kw_pub_key and fgpc.m_ad_grp_pub_key =
> >>> kws.m_ad_grp_pub_key
> >>> where
> >>> coalesce(fgpc.date_id,fgcd.date_id) between
> >>> 20131201 and 20140119
> >>> group by
> >>>
> >>>
> coalesce(fgpc.date_id,fgcd.date_id),fgpc.m_ad_grp_pub_key,fgpc.m_kw_pub_key,kws.expr_names,kws.expr_values,kws.m_ad_grp_semid
> >>> ;
> >>>
> >>> This is the explain plan for the same.
> >>> explain analyze verbose select ......
> >>> ....
> >>>
> >>> Data Node Scan on "__REMOTE_GROUP_QUERY__" (cost=0.00..2.50 rows=1000
> >>> width=908) (actual time=1672.149..8281.918 rows=605575 loops=1)
> >>> Output: (COALESCE((fgpc.date_id)::bigint, fgcd.date_id)),
> >>> fgpc.m_ad_grp_pub_key, fgpc.m_kw_pub_key, kws.expr_names,
> kws.expr_values,
> >>> kws.m_ad_grp_semid, (sum(fgpc.m_imps)), (sum(fgpc.m_clicks)),
> >>> (sum(fgpc.m_cost)), (sum(fgpc.m_conv_1pc)), (sum(fgpc.m_conv_mpc)),
> >>> (avg(fgpc.m_cnv_rate_1pc)), (avg(fgpc.m_cnv_ra
> >>> te_mpc)), (avg(fgpc.m_avg_cpc)), (avg(fgpc.m_max_cpc)),
> >>> (avg(fgpc.m_firstpage_cpc)), (avg(fgpc.m_topofpage_cpc)),
> >>> (avg(fgpc.m_avg_cpm)), (avg(fgpc.m_max_cpm)),
> (avg(fgpc.m_max_cpa_pct)),
> >>> (avg(fgpc.m_avg_pos)), (avg(fgpc.m_lowest_pos)),
> (avg(fgpc.m_highest_pos)),
> >>> (avg(fgpc.m_quality_score)), (avg(fgpc.m_view_thr
> >>> u_conv)), (sum(fgcd.m_revenue)), (sum(fgcd.m_conversions)),
> >>> (sum(COALESCE(kws.m_new_kw_bid, kws.m_kw_bid))),
> >>> (max(COALESCE(kws.m_new_kw_bid, kws.m_kw_bid))),
> >>> (min(COALESCE(kws.m_new_kw_bid, kws.m_kw_bid)))
> >>> Node/s: d11, d12, d13, d14, d15, d16
> >>> Remote query: SELECT COALESCE((l.a_1)::bigint, l.a_23), l.a_2,
> l.a_3,
> >>> r.a_1, r.a_2, r.a_3, sum(l.a_4), sum(l.a_5), sum(l.a_6), sum(l.a_7),
> >>> sum(l.a_8), pg_catalog.numeric_avg(avg(l.a_9)),
> >>> pg_catalog.numeric_avg(avg(l.a_10)),
> pg_catalog.numeric_avg(avg(l.a_11)),
> >>> pg_catalog.numeric_avg(avg(l.a_12)), pg_catalog.
> >>> numeric_avg(avg(l.a_13)), pg_catalog.numeric_avg(avg(l.a_14)),
> >>> pg_catalog.numeric_avg(avg(l.a_15)),
> pg_catalog.numeric_avg(avg(l.a_16)),
> >>> pg_catalog.numeric_avg(avg(l.a_17)),
> pg_catalog.numeric_avg(avg(l.a_18)),
> >>> pg_catalog.numeric_avg(avg(l.a_19)),
> pg_catalog.numeric_avg(avg(l.a_20)),
> >>> pg_catalog.numeric_avg(avg(
> >>> l.a_21)), pg_catalog.numeric_avg(avg(l.a_22)), sum(l.a_24),
> sum(l.a_25),
> >>> sum(COALESCE(r.a_4, r.a_5)), max(COALESCE(r.a_4, r.a_5)),
> >>> min(COALESCE(r.a_4, r.a_5)) FROM ((SELECT l.a_1, l.a_2, l.a_3, l.a_4,
> l.a_5,
> >>> l.a_6, l.a_7, l.a_8, l.a_9, l.a_10, l.a_11, l.a_12, l.a_13, l.a_14,
> l.a_15,
> >>> l.a_16, l.a_17, l.a_18, l.a_
> >>> 19, l.a_20, l.a_21, l.a_22, r.a_1, r.a_2, r.a_3 FROM ((SELECT
> >>> fgpc.date_id, fgpc.m_ad_grp_pub_key, fgpc.m_kw_pub_key, fgpc.m_imps,
> >>> fgpc.m_clicks, fgpc.m_cost, fgpc.m_conv_1pc, fgpc.m_conv_mpc,
> >>> fgpc.m_cnv_rate_1pc, fgpc.m_cnv_rate_mpc, fgpc.m_avg_cpc,
> fgpc.m_max_cpc,
> >>> fgpc.m_firstpage_cpc, fgpc.m_topofpage_cpc, f
> >>> gpc.m_avg_cpm, fgpc.m_max_cpm, fgpc.m_max_cpa_pct, fgpc.m_avg_pos,
> >>> fgpc.m_lowest_pos, fgpc.m_highest_pos, fgpc.m_quality_score,
> >>> fgpc.m_view_thru_conv FROM ONLY bidw.fact_msn_kw_perf_daily fgpc WHERE
> true)
> >>> l(a_1, a_2, a_3, a_4, a_5, a_6, a_7, a_8, a_9, a_10, a_11, a_12, a_13,
> a_14,
> >>> a_15, a_16, a_17, a_18, a_19,
> >>> a_20, a_21, a_22) LEFT JOIN (SELECT fgcd.date_id, fgcd.m_revenue,
> >>> fgcd.m_conversions, fgcd.m_ad_grp_pub_key, fgcd.m_kw_pub_key FROM ONLY
> >>> bidw.fact_msn_kw_conversion_daily fgcd WHERE true) r(a_1, a_2, a_3,
> a_4,
> >>> a_5) ON (((l.a_1 = l.a_1) AND (l.a_2 = r.a_4) AND (l.a_3 = r.a_5))))
> WHERE
> >>> ((COALESCE((l.a_1)::bigint,
> >>> r.a_1) >= 20131201) AND (COALESCE((l.a_1)::bigint, r.a_1) <=
> 20140119)))
> >>> l(a_1, a_2, a_3, a_4, a_5, a_6, a_7, a_8, a_9, a_10, a_11, a_12, a_13,
> a_14,
> >>> a_15, a_16, a_17, a_18, a_19, a_20, a_21, a_22, a_23, a_24, a_25) JOIN
> >>> (SELECT kws.expr_names, kws.expr_values, kws.m_ad_grp_semid,
> >>> kws.m_new_kw_bid, kws.m_kw_bi
> >>> d, kws.m_kw_pub_key, kws.m_ad_grp_pub_key FROM ONLY
> biods.msn_keyword_sup
> >>> kws WHERE true) r(a_1, a_2, a_3, a_4, a_5, a_6, a_7) ON (true)) WHERE
> >>> ((l.a_2 = r.a_7) AND (l.a_3 = r.a_6)) GROUP BY 1, 2, 3, 4, 5, 6
> >>> Total runtime: 8378.080 ms
> >>> (5 rows)
> >>>
> >>>
> >>>
> >>> This is the actual time taken by the query:
> >>>
> >>> [postgres@sv4-pgxc-db04 test]$ time psql -d mydb -f "test.sql" > a.out
> >>>
> >>> real 0m23.533s
> >>> user 0m15.705s
> >>> sys 0m0.748s
> >>>
> >>> Now i dont know why is it taking that much time.
> >>
> >>
> >> Try adding LIMIT with different amounts for example to see how that
> >> impacts time.
> >>
> >> Also, try enabling statement logging (log_statement = all in
> >> postgresql.conf) on the data nodes to see how long it takes on each
> node.
> >>
> >> Also, the statement was rewritten in XC, with relations converted into
> >> SELECTs, so try running the rewritten version directly to see how long
> it
> >> takes.
> >>
> >> Thanks,
> >>
> >> Mason
> >>
> >
>
|