|
From: Nirmal S. <sha...@gm...> - 2014-02-01 19:40:28
|
Hi Mason,
This is the actual query that i was running.
select coalesce(fgpc.date_id,fgcd.date_id)
date_id,
fgpc.m_ad_grp_pub_key m_ad_grp_pub_key,
fgpc.m_kw_pub_key m_kw_pub_key,
kws.expr_names,
kws.expr_values,
kws.m_ad_grp_semid,
sum(fgpc.m_imps) m_imps,
sum(fgpc.m_clicks) m_clicks,
sum(fgpc.m_cost) m_cost,
sum(fgpc.m_conv_1pc) m_conv_1pc,
sum(fgpc.m_conv_mpc) m_conv_mpc,
avg(fgpc.m_cnv_rate_1pc) m_cnv_rate_1pc,
avg(fgpc.m_cnv_rate_mpc) m_cnv_rate_mpc,
avg(fgpc.m_avg_cpc) m_avg_cpc,
avg(fgpc.m_max_cpc) m_max_cpc,
avg(fgpc.m_firstpage_cpc) m_firstpage_cpc,
avg(fgpc.m_topofpage_cpc) m_topofpage_cpc,
avg(fgpc.m_avg_cpm) m_avg_cpm,
avg(fgpc.m_max_cpm) m_max_cpm,
avg(fgpc.m_max_cpa_pct) m_max_cpa_pct,
avg(fgpc.m_avg_pos) m_avg_pos,
avg(fgpc.m_lowest_pos) m_lowest_pos,
avg(fgpc.m_highest_pos) m_highest_pos,
avg(fgpc.m_quality_score) m_quality_score,
avg(fgpc.m_view_thru_conv)
m_view_thru_conv,
sum(fgcd.m_revenue) m_revenue,
sum(fgcd.m_conversions) m_conversions,
sum(coalesce(kws.m_new_kw_bid,
kws.m_kw_bid)) m_kw_total_bid,
max(coalesce(kws.m_new_kw_bid,
kws.m_kw_bid)) m_kw_max_bid,
min(coalesce(kws.m_new_kw_bid,
kws.m_kw_bid)) m_kw_min_bid
from
bidw.fact_msn_kw_perf_daily fgpc
full outer join bidw.fact_msn_kw_conversion_daily fgcd on
fgpc.m_ad_grp_pub_key = fgcd.m_ad_grp_pub_key and fgpc.m_kw_pub_key =
fgcd.m_kw_pub_key and fgpc.date_id = fgpc.date_id
join biods.msn_keyword_sup kws on
fgpc.m_kw_pub_key = kws.m_kw_pub_key and fgpc.m_ad_grp_pub_key =
kws.m_ad_grp_pub_key
where
coalesce(fgpc.date_id,fgcd.date_id) between
20131201 and 20140119
group by
coalesce(fgpc.date_id,fgcd.date_id),fgpc.m_ad_grp_pub_key,fgpc.m_kw_pub_key,kws.expr_names,kws.expr_values,kws.m_ad_grp_semid
;
*This is the explain plan for the same.*
explain analyze verbose select ......
....
* Data Node Scan on "__REMOTE_GROUP_QUERY__" (cost=0.00..2.50 rows=1000
width=908) (actual time=1672.149..8281.918 rows=605575 loops=1)*
Output: (COALESCE((fgpc.date_id)::bigint, fgcd.date_id)),
fgpc.m_ad_grp_pub_key, fgpc.m_kw_pub_key, kws.expr_names, kws.expr_values,
kws.m_ad_grp_semid, (sum(fgpc.m_imps)), (sum(fgpc.m_clicks)),
(sum(fgpc.m_cost)), (sum(fgpc.m_conv_1pc)), (sum(fgpc.m_conv_mpc)),
(avg(fgpc.m_cnv_rate_1pc)), (avg(fgpc.m_cnv_ra
te_mpc)), (avg(fgpc.m_avg_cpc)), (avg(fgpc.m_max_cpc)),
(avg(fgpc.m_firstpage_cpc)), (avg(fgpc.m_topofpage_cpc)),
(avg(fgpc.m_avg_cpm)), (avg(fgpc.m_max_cpm)), (avg(fgpc.m_max_cpa_pct)),
(avg(fgpc.m_avg_pos)), (avg(fgpc.m_lowest_pos)), (avg(fgpc.m_highest_pos)),
(avg(fgpc.m_quality_score)), (avg(fgpc.m_view_thr
u_conv)), (sum(fgcd.m_revenue)), (sum(fgcd.m_conversions)),
(sum(COALESCE(kws.m_new_kw_bid, kws.m_kw_bid))),
(max(COALESCE(kws.m_new_kw_bid, kws.m_kw_bid))),
(min(COALESCE(kws.m_new_kw_bid, kws.m_kw_bid)))
*Node/s: d11, d12, d13, d14, d15, d16*
Remote query: SELECT COALESCE((l.a_1)::bigint, l.a_23), l.a_2, l.a_3,
r.a_1, r.a_2, r.a_3, sum(l.a_4), sum(l.a_5), sum(l.a_6), sum(l.a_7),
sum(l.a_8), pg_catalog.numeric_avg(avg(l.a_9)),
pg_catalog.numeric_avg(avg(l.a_10)), pg_catalog.numeric_avg(avg(l.a_11)),
pg_catalog.numeric_avg(avg(l.a_12)), pg_catalog.
numeric_avg(avg(l.a_13)), pg_catalog.numeric_avg(avg(l.a_14)),
pg_catalog.numeric_avg(avg(l.a_15)), pg_catalog.numeric_avg(avg(l.a_16)),
pg_catalog.numeric_avg(avg(l.a_17)), pg_catalog.numeric_avg(avg(l.a_18)),
pg_catalog.numeric_avg(avg(l.a_19)), pg_catalog.numeric_avg(avg(l.a_20)),
pg_catalog.numeric_avg(avg(
l.a_21)), pg_catalog.numeric_avg(avg(l.a_22)), sum(l.a_24), sum(l.a_25),
sum(COALESCE(r.a_4, r.a_5)), max(COALESCE(r.a_4, r.a_5)),
min(COALESCE(r.a_4, r.a_5)) FROM ((SELECT l.a_1, l.a_2, l.a_3, l.a_4,
l.a_5, l.a_6, l.a_7, l.a_8, l.a_9, l.a_10, l.a_11, l.a_12, l.a_13, l.a_14,
l.a_15, l.a_16, l.a_17, l.a_18, l.a_
19, l.a_20, l.a_21, l.a_22, r.a_1, r.a_2, r.a_3 FROM ((SELECT fgpc.date_id,
fgpc.m_ad_grp_pub_key, fgpc.m_kw_pub_key, fgpc.m_imps, fgpc.m_clicks,
fgpc.m_cost, fgpc.m_conv_1pc, fgpc.m_conv_mpc, fgpc.m_cnv_rate_1pc,
fgpc.m_cnv_rate_mpc, fgpc.m_avg_cpc, fgpc.m_max_cpc, fgpc.m_firstpage_cpc,
fgpc.m_topofpage_cpc, f
gpc.m_avg_cpm, fgpc.m_max_cpm, fgpc.m_max_cpa_pct, fgpc.m_avg_pos,
fgpc.m_lowest_pos, fgpc.m_highest_pos, fgpc.m_quality_score,
fgpc.m_view_thru_conv FROM ONLY bidw.fact_msn_kw_perf_daily fgpc WHERE
true) l(a_1, a_2, a_3, a_4, a_5, a_6, a_7, a_8, a_9, a_10, a_11, a_12,
a_13, a_14, a_15, a_16, a_17, a_18, a_19,
a_20, a_21, a_22) LEFT JOIN (SELECT fgcd.date_id, fgcd.m_revenue,
fgcd.m_conversions, fgcd.m_ad_grp_pub_key, fgcd.m_kw_pub_key FROM ONLY
bidw.fact_msn_kw_conversion_daily fgcd WHERE true) r(a_1, a_2, a_3, a_4,
a_5) ON (((l.a_1 = l.a_1) AND (l.a_2 = r.a_4) AND (l.a_3 = r.a_5)))) WHERE
((COALESCE((l.a_1)::bigint,
r.a_1) >= 20131201) AND (COALESCE((l.a_1)::bigint, r.a_1) <= 20140119)))
l(a_1, a_2, a_3, a_4, a_5, a_6, a_7, a_8, a_9, a_10, a_11, a_12, a_13,
a_14, a_15, a_16, a_17, a_18, a_19, a_20, a_21, a_22, a_23, a_24, a_25)
JOIN (SELECT kws.expr_names, kws.expr_values, kws.m_ad_grp_semid,
kws.m_new_kw_bid, kws.m_kw_bi
d, kws.m_kw_pub_key, kws.m_ad_grp_pub_key FROM ONLY biods.msn_keyword_sup
kws WHERE true) r(a_1, a_2, a_3, a_4, a_5, a_6, a_7) ON (true)) WHERE
((l.a_2 = r.a_7) AND (l.a_3 = r.a_6)) GROUP BY 1, 2, 3, 4, 5, 6
* Total runtime: 8378.080 ms*
(5 rows)
*This is the actual time taken by the query:*
[postgres@sv4-pgxc-db04 test]$ time psql -d mydb -f "test.sql" > a.out
*real 0m23.533s*
user 0m15.705s
sys 0m0.748s
Now i dont know why is it taking that much time.
Nirmal
On Sat, Feb 1, 2014 at 11:16 AM, Mason Sharp <ms...@tr...>wrote:
>
>
>
> On Sat, Feb 1, 2014 at 2:13 PM, Nirmal Sharma <sha...@gm...>wrote:
>
>> My query uses aggregates and joins and it looks like this :
>>
>> Select
>> Sum(...),
>> Sum(..),
>> Avg(..),
>> ...
>> ....
>> ..
>> From tableA a inner join tableB on a.col1 =b.col1
>> Inner join tableC on a.col1=c.col1
>>
>> All the 3 tables are distributed on hash(col1) .
>>
>> I have 1 coordinator , 6 nodes, 1 GTM.
>>
>> When I run this query , it takes total 23 sec.
>> But when I run the same query on each and individual nodes then it takes
>> 4 sec on each and every nodes.
>> So since it's cluster , it should ideally take 4 sec + some overhead time
>> to combine data from each node on coordinator ( max 2 more sec) but I don't
>> understand why it is taking 23 sec when runs from coordinator.
>>
>
>
> Can you please add an EXPLAIN in from of your SELECT to look at the plan?
> If you only use 2 tables instead of 3, does it behave more as expected?
>
>
|