|
From: Nirmal S. <sha...@gm...> - 2014-02-01 23:27:46
|
Hi, These are the timings for adding limit with different amount. With these timings you can see see that here the bottleneck is coordinator (i.e. retrieving data from various nodes to coordinator ). I just want to ask whether its normal or not? ---for limit 1000 [postgres@sv4-pgxc-db04 test]$ time psql -d myDB -f "test.sql" > a.out real 0m1.935s user 0m0.051s sys 0m0.002s ---for limit 10000 [postgres@sv4-pgxc-db04 test]$ time psql -d myDB -f "test.sql" > a.out real 0m2.724s user 0m0.481s sys 0m0.023s --for limit 100000 [postgres@sv4-pgxc-db04 test]$ time psql -d myDB -f "test.sql" > a.out real 0m12.102s user 0m3.139s sys 0m0.146s --for limit 200000 [postgres@sv4-pgxc-db04 test]$ time psql -d myDB -f "test.sql" > a.out real 0m13.078s user 0m5.507s sys 0m0.316s ---for limit 400000 [postgres@sv4-pgxc-db04 test]$ time psql -d myDB -f "test.sql" > a.out real 0m18.820s user 0m10.482s sys 0m0.659s ---for limit 600000 [postgres@sv4-pgxc-db04 test]$ time psql -d myDB -f "test.sql" > a.out real 0m23.478s user 0m15.631s sys 0m0.940s [postgres@sv4-pgxc-db04 test]$ I will also enable the statement log and try again and will send the output soon. Nirmal On Sat, Feb 1, 2014 at 1:47 PM, Mason Sharp <ms...@tr...> wrote: > > > > On Sat, Feb 1, 2014 at 2:40 PM, Nirmal Sharma <sha...@gm...>wrote: > >> Hi Mason, >> >> This is the actual query that i was running. >> >> select coalesce(fgpc.date_id,fgcd.date_id) >> date_id, >> fgpc.m_ad_grp_pub_key m_ad_grp_pub_key, >> fgpc.m_kw_pub_key m_kw_pub_key, >> kws.expr_names, >> kws.expr_values, >> kws.m_ad_grp_semid, >> sum(fgpc.m_imps) m_imps, >> sum(fgpc.m_clicks) m_clicks, >> sum(fgpc.m_cost) m_cost, >> >> sum(fgpc.m_conv_1pc) m_conv_1pc, >> sum(fgpc.m_conv_mpc) m_conv_mpc, >> avg(fgpc.m_cnv_rate_1pc) m_cnv_rate_1pc, >> avg(fgpc.m_cnv_rate_mpc) m_cnv_rate_mpc, >> avg(fgpc.m_avg_cpc) m_avg_cpc, >> avg(fgpc.m_max_cpc) m_max_cpc, >> avg(fgpc.m_firstpage_cpc) >> m_firstpage_cpc, >> avg(fgpc.m_topofpage_cpc) >> m_topofpage_cpc, >> avg(fgpc.m_avg_cpm) m_avg_cpm, >> avg(fgpc.m_max_cpm) m_max_cpm, >> avg(fgpc.m_max_cpa_pct) m_max_cpa_pct, >> avg(fgpc.m_avg_pos) m_avg_pos, >> avg(fgpc.m_lowest_pos) m_lowest_pos, >> avg(fgpc.m_highest_pos) m_highest_pos, >> avg(fgpc.m_quality_score) >> m_quality_score, >> avg(fgpc.m_view_thru_conv) >> m_view_thru_conv, >> sum(fgcd.m_revenue) m_revenue, >> sum(fgcd.m_conversions) m_conversions, >> sum(coalesce(kws.m_new_kw_bid, >> kws.m_kw_bid)) m_kw_total_bid, >> max(coalesce(kws.m_new_kw_bid, >> kws.m_kw_bid)) m_kw_max_bid, >> min(coalesce(kws.m_new_kw_bid, >> kws.m_kw_bid)) m_kw_min_bid >> from >> bidw.fact_msn_kw_perf_daily fgpc >> full outer join bidw.fact_msn_kw_conversion_daily fgcd on >> fgpc.m_ad_grp_pub_key = fgcd.m_ad_grp_pub_key and fgpc.m_kw_pub_key = >> fgcd.m_kw_pub_key and fgpc.date_id = fgpc.date_id >> join biods.msn_keyword_sup kws on >> fgpc.m_kw_pub_key = kws.m_kw_pub_key and fgpc.m_ad_grp_pub_key = >> kws.m_ad_grp_pub_key >> where >> coalesce(fgpc.date_id,fgcd.date_id) between >> 20131201 and 20140119 >> group by >> >> coalesce(fgpc.date_id,fgcd.date_id),fgpc.m_ad_grp_pub_key,fgpc.m_kw_pub_key,kws.expr_names,kws.expr_values,kws.m_ad_grp_semid >> ; >> >> *This is the explain plan for the same.* >> explain analyze verbose select ...... >> .... >> >> * Data Node Scan on "__REMOTE_GROUP_QUERY__" (cost=0.00..2.50 rows=1000 >> width=908) (actual time=1672.149..8281.918 rows=605575 loops=1)* >> Output: (COALESCE((fgpc.date_id)::bigint, fgcd.date_id)), >> fgpc.m_ad_grp_pub_key, fgpc.m_kw_pub_key, kws.expr_names, kws.expr_values, >> kws.m_ad_grp_semid, (sum(fgpc.m_imps)), (sum(fgpc.m_clicks)), >> (sum(fgpc.m_cost)), (sum(fgpc.m_conv_1pc)), (sum(fgpc.m_conv_mpc)), >> (avg(fgpc.m_cnv_rate_1pc)), (avg(fgpc.m_cnv_ra >> te_mpc)), (avg(fgpc.m_avg_cpc)), (avg(fgpc.m_max_cpc)), >> (avg(fgpc.m_firstpage_cpc)), (avg(fgpc.m_topofpage_cpc)), >> (avg(fgpc.m_avg_cpm)), (avg(fgpc.m_max_cpm)), (avg(fgpc.m_max_cpa_pct)), >> (avg(fgpc.m_avg_pos)), (avg(fgpc.m_lowest_pos)), (avg(fgpc.m_highest_pos)), >> (avg(fgpc.m_quality_score)), (avg(fgpc.m_view_thr >> u_conv)), (sum(fgcd.m_revenue)), (sum(fgcd.m_conversions)), >> (sum(COALESCE(kws.m_new_kw_bid, kws.m_kw_bid))), >> (max(COALESCE(kws.m_new_kw_bid, kws.m_kw_bid))), >> (min(COALESCE(kws.m_new_kw_bid, kws.m_kw_bid))) >> *Node/s: d11, d12, d13, d14, d15, d16* >> Remote query: SELECT COALESCE((l.a_1)::bigint, l.a_23), l.a_2, l.a_3, >> r.a_1, r.a_2, r.a_3, sum(l.a_4), sum(l.a_5), sum(l.a_6), sum(l.a_7), >> sum(l.a_8), pg_catalog.numeric_avg(avg(l.a_9)), >> pg_catalog.numeric_avg(avg(l.a_10)), pg_catalog.numeric_avg(avg(l.a_11)), >> pg_catalog.numeric_avg(avg(l.a_12)), pg_catalog. >> numeric_avg(avg(l.a_13)), pg_catalog.numeric_avg(avg(l.a_14)), >> pg_catalog.numeric_avg(avg(l.a_15)), pg_catalog.numeric_avg(avg(l.a_16)), >> pg_catalog.numeric_avg(avg(l.a_17)), pg_catalog.numeric_avg(avg(l.a_18)), >> pg_catalog.numeric_avg(avg(l.a_19)), pg_catalog.numeric_avg(avg(l.a_20)), >> pg_catalog.numeric_avg(avg( >> l.a_21)), pg_catalog.numeric_avg(avg(l.a_22)), sum(l.a_24), sum(l.a_25), >> sum(COALESCE(r.a_4, r.a_5)), max(COALESCE(r.a_4, r.a_5)), >> min(COALESCE(r.a_4, r.a_5)) FROM ((SELECT l.a_1, l.a_2, l.a_3, l.a_4, >> l.a_5, l.a_6, l.a_7, l.a_8, l.a_9, l.a_10, l.a_11, l.a_12, l.a_13, l.a_14, >> l.a_15, l.a_16, l.a_17, l.a_18, l.a_ >> 19, l.a_20, l.a_21, l.a_22, r.a_1, r.a_2, r.a_3 FROM ((SELECT >> fgpc.date_id, fgpc.m_ad_grp_pub_key, fgpc.m_kw_pub_key, fgpc.m_imps, >> fgpc.m_clicks, fgpc.m_cost, fgpc.m_conv_1pc, fgpc.m_conv_mpc, >> fgpc.m_cnv_rate_1pc, fgpc.m_cnv_rate_mpc, fgpc.m_avg_cpc, fgpc.m_max_cpc, >> fgpc.m_firstpage_cpc, fgpc.m_topofpage_cpc, f >> gpc.m_avg_cpm, fgpc.m_max_cpm, fgpc.m_max_cpa_pct, fgpc.m_avg_pos, >> fgpc.m_lowest_pos, fgpc.m_highest_pos, fgpc.m_quality_score, >> fgpc.m_view_thru_conv FROM ONLY bidw.fact_msn_kw_perf_daily fgpc WHERE >> true) l(a_1, a_2, a_3, a_4, a_5, a_6, a_7, a_8, a_9, a_10, a_11, a_12, >> a_13, a_14, a_15, a_16, a_17, a_18, a_19, >> a_20, a_21, a_22) LEFT JOIN (SELECT fgcd.date_id, fgcd.m_revenue, >> fgcd.m_conversions, fgcd.m_ad_grp_pub_key, fgcd.m_kw_pub_key FROM ONLY >> bidw.fact_msn_kw_conversion_daily fgcd WHERE true) r(a_1, a_2, a_3, a_4, >> a_5) ON (((l.a_1 = l.a_1) AND (l.a_2 = r.a_4) AND (l.a_3 = r.a_5)))) WHERE >> ((COALESCE((l.a_1)::bigint, >> r.a_1) >= 20131201) AND (COALESCE((l.a_1)::bigint, r.a_1) <= 20140119))) >> l(a_1, a_2, a_3, a_4, a_5, a_6, a_7, a_8, a_9, a_10, a_11, a_12, a_13, >> a_14, a_15, a_16, a_17, a_18, a_19, a_20, a_21, a_22, a_23, a_24, a_25) >> JOIN (SELECT kws.expr_names, kws.expr_values, kws.m_ad_grp_semid, >> kws.m_new_kw_bid, kws.m_kw_bi >> d, kws.m_kw_pub_key, kws.m_ad_grp_pub_key FROM ONLY biods.msn_keyword_sup >> kws WHERE true) r(a_1, a_2, a_3, a_4, a_5, a_6, a_7) ON (true)) WHERE >> ((l.a_2 = r.a_7) AND (l.a_3 = r.a_6)) GROUP BY 1, 2, 3, 4, 5, 6 >> * Total runtime: 8378.080 ms* >> (5 rows) >> >> >> >> *This is the actual time taken by the query:* >> >> [postgres@sv4-pgxc-db04 test]$ time psql -d mydb -f "test.sql" > a.out >> >> *real 0m23.533s* >> user 0m15.705s >> sys 0m0.748s >> >> Now i dont know why is it taking that much time. >> > > Try adding LIMIT with different amounts for example to see how that > impacts time. > > Also, try enabling statement logging (log_statement = all in > postgresql.conf) on the data nodes to see how long it takes on each node. > > Also, the statement was rewritten in XC, with relations converted into > SELECTs, so try running the rewritten version directly to see how long it > takes. > > Thanks, > > Mason > > |