|
From: Koichi S. <koi...@gm...> - 2014-02-02 03:44:48
|
Could you share "explain" result to see how plan works fine. Regards; --- Koichi Suzuki 2014-02-02 Nirmal Sharma <sha...@gm...>: > Hi, > > These are the timings for adding limit with different amount. > With these timings you can see see that here the bottleneck is coordinator > (i.e. retrieving data from various nodes to coordinator ). > I just want to ask whether its normal or not? > > ---for limit 1000 > [postgres@sv4-pgxc-db04 test]$ time psql -d myDB -f "test.sql" > a.out > > real 0m1.935s > user 0m0.051s > sys 0m0.002s > > ---for limit 10000 > [postgres@sv4-pgxc-db04 test]$ time psql -d myDB -f "test.sql" > a.out > > real 0m2.724s > user 0m0.481s > sys 0m0.023s > > --for limit 100000 > [postgres@sv4-pgxc-db04 test]$ time psql -d myDB -f "test.sql" > a.out > > real 0m12.102s > user 0m3.139s > sys 0m0.146s > > --for limit 200000 > [postgres@sv4-pgxc-db04 test]$ time psql -d myDB -f "test.sql" > a.out > > real 0m13.078s > user 0m5.507s > sys 0m0.316s > > ---for limit 400000 > [postgres@sv4-pgxc-db04 test]$ time psql -d myDB -f "test.sql" > a.out > > real 0m18.820s > user 0m10.482s > sys 0m0.659s > > ---for limit 600000 > [postgres@sv4-pgxc-db04 test]$ time psql -d myDB -f "test.sql" > a.out > > real 0m23.478s > user 0m15.631s > sys 0m0.940s > [postgres@sv4-pgxc-db04 test]$ > > > I will also enable the statement log and try again and will send the output > soon. > > Nirmal > > > On Sat, Feb 1, 2014 at 1:47 PM, Mason Sharp <ms...@tr...> wrote: >> >> >> >> >> On Sat, Feb 1, 2014 at 2:40 PM, Nirmal Sharma <sha...@gm...> >> wrote: >>> >>> Hi Mason, >>> >>> This is the actual query that i was running. >>> >>> select coalesce(fgpc.date_id,fgcd.date_id) >>> date_id, >>> fgpc.m_ad_grp_pub_key m_ad_grp_pub_key, >>> fgpc.m_kw_pub_key m_kw_pub_key, >>> kws.expr_names, >>> kws.expr_values, >>> kws.m_ad_grp_semid, >>> sum(fgpc.m_imps) m_imps, >>> sum(fgpc.m_clicks) m_clicks, >>> sum(fgpc.m_cost) m_cost, >>> sum(fgpc.m_conv_1pc) m_conv_1pc, >>> sum(fgpc.m_conv_mpc) m_conv_mpc, >>> avg(fgpc.m_cnv_rate_1pc) m_cnv_rate_1pc, >>> avg(fgpc.m_cnv_rate_mpc) m_cnv_rate_mpc, >>> avg(fgpc.m_avg_cpc) m_avg_cpc, >>> avg(fgpc.m_max_cpc) m_max_cpc, >>> avg(fgpc.m_firstpage_cpc) >>> m_firstpage_cpc, >>> avg(fgpc.m_topofpage_cpc) >>> m_topofpage_cpc, >>> avg(fgpc.m_avg_cpm) m_avg_cpm, >>> avg(fgpc.m_max_cpm) m_max_cpm, >>> avg(fgpc.m_max_cpa_pct) m_max_cpa_pct, >>> avg(fgpc.m_avg_pos) m_avg_pos, >>> avg(fgpc.m_lowest_pos) m_lowest_pos, >>> avg(fgpc.m_highest_pos) m_highest_pos, >>> avg(fgpc.m_quality_score) >>> m_quality_score, >>> avg(fgpc.m_view_thru_conv) >>> m_view_thru_conv, >>> sum(fgcd.m_revenue) m_revenue, >>> sum(fgcd.m_conversions) m_conversions, >>> sum(coalesce(kws.m_new_kw_bid, >>> kws.m_kw_bid)) m_kw_total_bid, >>> max(coalesce(kws.m_new_kw_bid, >>> kws.m_kw_bid)) m_kw_max_bid, >>> min(coalesce(kws.m_new_kw_bid, >>> kws.m_kw_bid)) m_kw_min_bid >>> from >>> bidw.fact_msn_kw_perf_daily fgpc >>> full outer join bidw.fact_msn_kw_conversion_daily fgcd on >>> fgpc.m_ad_grp_pub_key = fgcd.m_ad_grp_pub_key and fgpc.m_kw_pub_key = >>> fgcd.m_kw_pub_key and fgpc.date_id = fgpc.date_id >>> join biods.msn_keyword_sup kws on >>> fgpc.m_kw_pub_key = kws.m_kw_pub_key and fgpc.m_ad_grp_pub_key = >>> kws.m_ad_grp_pub_key >>> where >>> coalesce(fgpc.date_id,fgcd.date_id) between >>> 20131201 and 20140119 >>> group by >>> >>> coalesce(fgpc.date_id,fgcd.date_id),fgpc.m_ad_grp_pub_key,fgpc.m_kw_pub_key,kws.expr_names,kws.expr_values,kws.m_ad_grp_semid >>> ; >>> >>> This is the explain plan for the same. >>> explain analyze verbose select ...... >>> .... >>> >>> Data Node Scan on "__REMOTE_GROUP_QUERY__" (cost=0.00..2.50 rows=1000 >>> width=908) (actual time=1672.149..8281.918 rows=605575 loops=1) >>> Output: (COALESCE((fgpc.date_id)::bigint, fgcd.date_id)), >>> fgpc.m_ad_grp_pub_key, fgpc.m_kw_pub_key, kws.expr_names, kws.expr_values, >>> kws.m_ad_grp_semid, (sum(fgpc.m_imps)), (sum(fgpc.m_clicks)), >>> (sum(fgpc.m_cost)), (sum(fgpc.m_conv_1pc)), (sum(fgpc.m_conv_mpc)), >>> (avg(fgpc.m_cnv_rate_1pc)), (avg(fgpc.m_cnv_ra >>> te_mpc)), (avg(fgpc.m_avg_cpc)), (avg(fgpc.m_max_cpc)), >>> (avg(fgpc.m_firstpage_cpc)), (avg(fgpc.m_topofpage_cpc)), >>> (avg(fgpc.m_avg_cpm)), (avg(fgpc.m_max_cpm)), (avg(fgpc.m_max_cpa_pct)), >>> (avg(fgpc.m_avg_pos)), (avg(fgpc.m_lowest_pos)), (avg(fgpc.m_highest_pos)), >>> (avg(fgpc.m_quality_score)), (avg(fgpc.m_view_thr >>> u_conv)), (sum(fgcd.m_revenue)), (sum(fgcd.m_conversions)), >>> (sum(COALESCE(kws.m_new_kw_bid, kws.m_kw_bid))), >>> (max(COALESCE(kws.m_new_kw_bid, kws.m_kw_bid))), >>> (min(COALESCE(kws.m_new_kw_bid, kws.m_kw_bid))) >>> Node/s: d11, d12, d13, d14, d15, d16 >>> Remote query: SELECT COALESCE((l.a_1)::bigint, l.a_23), l.a_2, l.a_3, >>> r.a_1, r.a_2, r.a_3, sum(l.a_4), sum(l.a_5), sum(l.a_6), sum(l.a_7), >>> sum(l.a_8), pg_catalog.numeric_avg(avg(l.a_9)), >>> pg_catalog.numeric_avg(avg(l.a_10)), pg_catalog.numeric_avg(avg(l.a_11)), >>> pg_catalog.numeric_avg(avg(l.a_12)), pg_catalog. >>> numeric_avg(avg(l.a_13)), pg_catalog.numeric_avg(avg(l.a_14)), >>> pg_catalog.numeric_avg(avg(l.a_15)), pg_catalog.numeric_avg(avg(l.a_16)), >>> pg_catalog.numeric_avg(avg(l.a_17)), pg_catalog.numeric_avg(avg(l.a_18)), >>> pg_catalog.numeric_avg(avg(l.a_19)), pg_catalog.numeric_avg(avg(l.a_20)), >>> pg_catalog.numeric_avg(avg( >>> l.a_21)), pg_catalog.numeric_avg(avg(l.a_22)), sum(l.a_24), sum(l.a_25), >>> sum(COALESCE(r.a_4, r.a_5)), max(COALESCE(r.a_4, r.a_5)), >>> min(COALESCE(r.a_4, r.a_5)) FROM ((SELECT l.a_1, l.a_2, l.a_3, l.a_4, l.a_5, >>> l.a_6, l.a_7, l.a_8, l.a_9, l.a_10, l.a_11, l.a_12, l.a_13, l.a_14, l.a_15, >>> l.a_16, l.a_17, l.a_18, l.a_ >>> 19, l.a_20, l.a_21, l.a_22, r.a_1, r.a_2, r.a_3 FROM ((SELECT >>> fgpc.date_id, fgpc.m_ad_grp_pub_key, fgpc.m_kw_pub_key, fgpc.m_imps, >>> fgpc.m_clicks, fgpc.m_cost, fgpc.m_conv_1pc, fgpc.m_conv_mpc, >>> fgpc.m_cnv_rate_1pc, fgpc.m_cnv_rate_mpc, fgpc.m_avg_cpc, fgpc.m_max_cpc, >>> fgpc.m_firstpage_cpc, fgpc.m_topofpage_cpc, f >>> gpc.m_avg_cpm, fgpc.m_max_cpm, fgpc.m_max_cpa_pct, fgpc.m_avg_pos, >>> fgpc.m_lowest_pos, fgpc.m_highest_pos, fgpc.m_quality_score, >>> fgpc.m_view_thru_conv FROM ONLY bidw.fact_msn_kw_perf_daily fgpc WHERE true) >>> l(a_1, a_2, a_3, a_4, a_5, a_6, a_7, a_8, a_9, a_10, a_11, a_12, a_13, a_14, >>> a_15, a_16, a_17, a_18, a_19, >>> a_20, a_21, a_22) LEFT JOIN (SELECT fgcd.date_id, fgcd.m_revenue, >>> fgcd.m_conversions, fgcd.m_ad_grp_pub_key, fgcd.m_kw_pub_key FROM ONLY >>> bidw.fact_msn_kw_conversion_daily fgcd WHERE true) r(a_1, a_2, a_3, a_4, >>> a_5) ON (((l.a_1 = l.a_1) AND (l.a_2 = r.a_4) AND (l.a_3 = r.a_5)))) WHERE >>> ((COALESCE((l.a_1)::bigint, >>> r.a_1) >= 20131201) AND (COALESCE((l.a_1)::bigint, r.a_1) <= 20140119))) >>> l(a_1, a_2, a_3, a_4, a_5, a_6, a_7, a_8, a_9, a_10, a_11, a_12, a_13, a_14, >>> a_15, a_16, a_17, a_18, a_19, a_20, a_21, a_22, a_23, a_24, a_25) JOIN >>> (SELECT kws.expr_names, kws.expr_values, kws.m_ad_grp_semid, >>> kws.m_new_kw_bid, kws.m_kw_bi >>> d, kws.m_kw_pub_key, kws.m_ad_grp_pub_key FROM ONLY biods.msn_keyword_sup >>> kws WHERE true) r(a_1, a_2, a_3, a_4, a_5, a_6, a_7) ON (true)) WHERE >>> ((l.a_2 = r.a_7) AND (l.a_3 = r.a_6)) GROUP BY 1, 2, 3, 4, 5, 6 >>> Total runtime: 8378.080 ms >>> (5 rows) >>> >>> >>> >>> This is the actual time taken by the query: >>> >>> [postgres@sv4-pgxc-db04 test]$ time psql -d mydb -f "test.sql" > a.out >>> >>> real 0m23.533s >>> user 0m15.705s >>> sys 0m0.748s >>> >>> Now i dont know why is it taking that much time. >> >> >> Try adding LIMIT with different amounts for example to see how that >> impacts time. >> >> Also, try enabling statement logging (log_statement = all in >> postgresql.conf) on the data nodes to see how long it takes on each node. >> >> Also, the statement was rewritten in XC, with relations converted into >> SELECTs, so try running the rewritten version directly to see how long it >> takes. >> >> Thanks, >> >> Mason >> > |