|
From: Nick M. <nm...@gm...> - 2012-08-22 20:59:34
|
Sorry, yes i forgot to including my indexes, they are as follows:
// parent indexes
CREATE INDEX parent_list_id_index ON parent(list_id);
CREATE INDEX parent_time_index ON parent(time);
// list indexes
CREATE INDEX list_list_id_index ON list(list_id);
CREATE INDEX list_sub_list_id_index ON list(sub_list_id);
// sub list indexes
CREATE INDEX sub_list_sub_list_id_index ON sub_list(sub_list_id);
*EXPLAIN ANALYZE from regular Postgres (8.4):*
test_db=# EXPLAIN ANALYZE
SELECT sub_list.*
FROM sub_list
JOIN list AS listquery
ON listquery.sub_list_id = sub_list.sub_list_id
JOIN parent AS parentquery
ON parentquery.list_id = listquery.list_id
WHERE parentquery.time > 20000 AND
parentquery.time < 30000;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
------------------------
Nested Loop (cost=596.23..102441.82 rows=29973 width=253) (actual
time=25.015..488.914 rows=39996 loops=1)
-> Hash Join (cost=596.23..51970.75 rows=20602 width=8) (actual
time=25.002..446.462 rows=19998 loops=1)
Hash Cond: (listquery.list_id = parentquery.list_id)
-> Seq Scan on list listquery (cost=0.00..35067.80 rows=1840080
width=16) (actual time=0.055..160.550 rows=2000000 loops=1)
-> Hash (cost=456.28..456.28 rows=11196 width=8) (actual
time=14.105..14.105 rows=9999 loops=1)
-> Index Scan using parent_time_index on parent parentquery
(cost=0.00..456.28 rows=11196 width=8) (actual time=0.061..8.450 rows=9999
l
oops=1)
Index Cond: ((time > 20000) AND (time < 30000))
-> Index Scan using sub_list_sub_list_id_index on sub_list
(cost=0.00..2.42 rows=2 width=253) (actual time=0.001..0.
002 rows=2 loops=19998)
Index Cond: (sub_list.sub_list_id = listquery.sub_list_id)
Total runtime: 491.447 ms
*///////////////////////////////////////*
*
*
test_db=# EXPLAIN ANALYZE
SELECT sub_list.*
FROM sub_list,
(SELECT list.sub_list_id
FROM list,
(SELECT list_id
FROM parent
WHERE time > 20000 AND
time < 30000
) AS parentquery
WHERE list.list_id = parentquery.list_id
) AS listquery
WHERE sub_list.sub_list_id = listquery.sub_list_id;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
------------------------
Nested Loop (cost=596.23..102441.82 rows=29973 width=253) (actual
time=25.493..494.204 rows=39996 loops=1)
-> Hash Join (cost=596.23..51970.75 rows=20602 width=8) (actual
time=25.479..452.275 rows=19998 loops=1)
Hash Cond: (list.list_id = parent.list_id)
-> Seq Scan on list (cost=0.00..35067.80 rows=1840080 width=16)
(actual time=0.051..161.849 rows=2000000 loops=1)
-> Hash (cost=456.28..456.28 rows=11196 width=8) (actual
time=15.444..15.444 rows=9999 loops=1)
-> Index Scan using parent_time_index on parent
(cost=0.00..456.28 rows=11196 width=8) (actual time=0.046..9.568 rows=9999
loops=1)
Index Cond: ((time > 20000) AND (time < 30000))
-> Index Scan using sub_list_sub_list_id_index on sub_list
(cost=0.00..2.42 rows=2 width=253) (actual time=0.001..0.
002 rows=2 loops=19998)
Index Cond: (sub_list.sub_list_id = list.sub_list_id)
Total runtime: 496.729 ms
*///////////////////////////////////////*
test_db=# EXPLAIN ANALYZE
SELECT sub_list.*
FROM sub_list
WHERE sub_list_id IN (SELECT sub_list_id
FROM list
WHERE list_id IN (SELECT list_id
FROM parent
WHERE time > 20000 AND
time < 30000
)
);
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
----
Hash Semi Join (cost=42074.51..123747.06 rows=47078 width=253) (actual
time=460.406..1376.375 rows=39996 loops=1)
Hash Cond: (sub_list.sub_list_id = list.sub_list_id)
-> Seq Scan on sub_list (cost=0.00..72183.31 rows=2677131 width=253)
(actual time=0.080..337.862 rows=4000000 loops=1)
-> Hash (cost=41781.08..41781.08 rows=23475 width=8) (actual
time=439.640..439.640 rows=19998 loops=1)
-> Hash Semi Join (cost=596.23..41781.08 rows=23475 width=8)
(actual time=19.608..436.503 rows=19998 loops=1)
Hash Cond: (list.list_id = parent.list_id)
-> Seq Scan on list (cost=0.00..35067.80 rows=1840080
width=16) (actual time=0.022..159.590 rows=2000000 loops=1)
-> Hash (cost=456.28..456.28 rows=11196 width=8) (actual
time=9.989..9.989 rows=9999 loops=1)
-> Index Scan using parent_time_index on parent
(cost=0.00..456.28 rows=11196 width=8) (actual time=0.046..6.125 rows=9999
loops
=1)
Index Cond: ((base_time > 20000) AND (base_time
< 30000))
Total runtime: 1378.711 ms
Regards,
Nick
On Wed, Aug 22, 2012 at 4:53 PM, Mason Sharp <ma...@st...> wrote:
> On Wed, Aug 22, 2012 at 3:21 PM, Nick Maludy <nm...@gm...> wrote:
> > All,
> >
> > I am trying to run the following query which never finishes running:
> >
> > SELECT sub_list.*
> > FROM sub_list
> > JOIN list AS listquery
> > ON listquery.sub_list_id = sub_list.sub_list_id
> > JOIN parent AS parentquery
> > ON parentquery.list_id = listquery.list_id
> > WHERE parentquery.time > 20000 AND
> > parentquery.time < 30000;
> >
> > Please excuse my dumb table and column names this is just an example with
> > the same structure as a real query of mine.
> >
> > CREATE TABLE parent (
> > name text,
> > time bigint,
> > list_id bigserial
> > );
> >
> > CREATE TABLE list (
> > list_id bigint,
> > name text,
> > time bigint,
> > sub_list_id bigserial
> > );
> >
> > CREATE TABLE sub_list (
> > sub_list_id bigint,
> > element_name text,
> > element_value numeric,
> > time bigint
> > );
> >
> > I have tried rewriting the same query in the following ways, none of them
> > finish running:
> >
> > SELECT sub_list.*
> > FROM sub_list,
> > (SELECT list.sub_list_id
> > FROM list,
> > (SELECT list_id
> > FROM parent
> > WHERE time > 20000 AND
> > time < 30000
> > ) AS parentquery
> > WHERE list.list_id = parentquery.list_id
> > ) AS listquery
> > WHERE sub_list.sub_list_id = listquery.sub_list_id;
> >
> > SELECT sub_list.*
> > FROM sub_list
> > WHERE sub_list_id IN (SELECT sub_list_id
> > FROM list
> > WHERE list_id IN (SELECT list_id
> > FROM parent
> > WHERE time > 20000 AND
> > time < 30000);
> >
> > By "not finishing running" i mean that i run them in psql and they hang
> > there, i notice that my datanode processes (postgres process name) are
> > pegged at 100% CPU usage, but iotop doesn't show any disk activity. When
> i
> > try to run EXPLAIN ANALYZE i get the same results and have to Ctrl-C out.
> >
> > My parent table has 1 million rows, my list table has 10 million rows,
> and
> > my sub_list_table has 100 million rows.
> >
> > I am able to run all of these queries just fine on regular Postgres with
> the
> > same amount of data in the tables.
> >
> > Any help is greatly appreciated,
>
> Please add DISTRIBUTE BY HASH(list_id) as a clause at the end of your
> CREATE TABLE statement. Otherwise what it is doing is pulling up a lot
> of data to the coordinator for joining.
>
> Also, let us know what indexes you have created.
>
> > Nick
> >
> >
> ------------------------------------------------------------------------------
> > Live Security Virtual Conference
> > Exclusive live event will cover all the ways today's security and
> > threat landscape has changed and how IT managers can respond. Discussions
> > will include endpoint security, mobile security and the latest in malware
> > threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
> > _______________________________________________
> > Postgres-xc-general mailing list
> > Pos...@li...
> > https://lists.sourceforge.net/lists/listinfo/postgres-xc-general
> >
>
>
>
> --
> Mason Sharp
>
> StormDB - http://www.stormdb.com
> The Database Cloud
>
|