postgresql – 慢速嵌套循环左连接,索引扫描循环130k次
我真的很难优化这个查询:
SELECT wins / (wins + COUNT(loosers.match_id) + 0.) winrate,wins + COUNT(loosers.match_id) matches,winners.winning_champion_one_id,winners.winning_champion_two_id,winners.winning_champion_three_id,winners.winning_champion_four_id,winners.winning_champion_five_id FROM ( SELECT COUNT(match_id) wins,winning_champion_one_id,winning_champion_two_id,winning_champion_three_id,winning_champion_four_id,winning_champion_five_id FROM matches WHERE 157 IN (winning_champion_one_id,winning_champion_five_id) GROUP BY winning_champion_one_id,winning_champion_five_id ) winners LEFT OUTER JOIN matches loosers ON winners.winning_champion_one_id = loosers.loosing_champion_one_id AND winners.winning_champion_two_id = loosers.loosing_champion_two_id AND winners.winning_champion_three_id = loosers.loosing_champion_three_id AND winners.winning_champion_four_id = loosers.loosing_champion_four_id AND winners.winning_champion_five_id = loosers.loosing_champion_five_id GROUP BY winners.wins,winners.winning_champion_five_id HAVING wins + COUNT(loosers.match_id) >= 20 ORDER BY winrate DESC,matches DESC LIMIT 1; 这是EXPLAIN(BUFFERS,ANALYZE)的输出: Limit (cost=72808.80..72808.80 rows=1 width=58) (actual time=1478.749..1478.749 rows=1 loops=1) Buffers: shared hit=457002 -> Sort (cost=72808.80..72837.64 rows=11535 width=58) (actual time=1478.747..1478.747 rows=1 loops=1) " Sort Key: ((((count(matches.match_id)))::numeric / ((((count(matches.match_id)) + count(loosers.match_id)))::numeric + '0'::numeric))) DESC,(((count(matches.match_id)) + count(loosers.match_id))) DESC" Sort Method: top-N heapsort Memory: 25kB Buffers: shared hit=457002 -> HashAggregate (cost=72462.75..72751.12 rows=11535 width=58) (actual time=1448.941..1478.643 rows=83 loops=1) " Group Key: (count(matches.match_id)),matches.winning_champion_one_id,matches.winning_champion_two_id,matches.winning_champion_three_id,matches.winning_champion_four_id,matches.winning_champion_five_id" Filter: (((count(matches.match_id)) + count(loosers.match_id)) >= 20) Rows Removed by Filter: 129131 Buffers: shared hit=457002 -> Nested Loop Left Join (cost=9857.76..69867.33 rows=115352 width=26) (actual time=288.086..1309.687 rows=146610 loops=1) Buffers: shared hit=457002 -> HashAggregate (cost=9857.33..11010.85 rows=115352 width=18) (actual time=288.056..408.317 rows=129214 loops=1) " Group Key: matches.winning_champion_one_id,matches.winning_champion_five_id" Buffers: shared hit=22174 -> Bitmap Heap Scan on matches (cost=1533.34..7455.69 rows=160109 width=18) (actual time=26.618..132.844 rows=161094 loops=1) Recheck Cond: ((157 = winning_champion_one_id) OR (157 = winning_champion_two_id) OR (157 = winning_champion_three_id) OR (157 = winning_champion_four_id) OR (157 = winning_champion_five_id)) Heap Blocks: exact=21594 Buffers: shared hit=22174 -> BitmapOr (cost=1533.34..1533.34 rows=164260 width=0) (actual time=22.190..22.190 rows=0 loops=1) Buffers: shared hit=580 -> Bitmap Index Scan on matches_winning_champion_one_id_index (cost=0.00..35.03 rows=4267 width=0) (actual time=0.045..0.045 rows=117 loops=1) Index Cond: (157 = winning_champion_one_id) Buffers: shared hit=3 -> Bitmap Index Scan on matches_winning_champion_two_id_index (cost=0.00..47.22 rows=5772 width=0) (actual time=0.665..0.665 rows=3010 loops=1) Index Cond: (157 = winning_champion_two_id) Buffers: shared hit=13 -> Bitmap Index Scan on matches_winning_champion_three_id_index (cost=0.00..185.53 rows=22840 width=0) (actual time=3.824..3.824 rows=23893 loops=1) Index Cond: (157 = winning_champion_three_id) Buffers: shared hit=89 -> Bitmap Index Scan on matches_winning_champion_four_id_index (cost=0.00..537.26 rows=66257 width=0) (actual time=8.069..8.069 rows=67255 loops=1) Index Cond: (157 = winning_champion_four_id) Buffers: shared hit=244 -> Bitmap Index Scan on matches_winning_champion_five_id_index (cost=0.00..528.17 rows=65125 width=0) (actual time=9.577..9.577 rows=67202 loops=1) Index Cond: (157 = winning_champion_five_id) Buffers: shared hit=231 -> Index Scan using matches_loosing_champion_ids_index on matches loosers (cost=0.43..0.49 rows=1 width=18) (actual time=0.006..0.006 rows=0 loops=129214) Index Cond: ((matches.winning_champion_one_id = loosing_champion_one_id) AND (matches.winning_champion_two_id = loosing_champion_two_id) AND (matches.winning_champion_three_id = loosing_champion_three_id) AND (matches.winning_champion_four_id = loosing_champion_four_id) AND (matches.winning_champion_five_id = loosing_champion_five_id)) Buffers: shared hit=434828 Planning time: 0.584 ms Execution time: 1479.779 ms 表和索引定义: create table matches ( match_id bigint not null,winning_champion_one_id smallint,winning_champion_two_id smallint,winning_champion_three_id smallint,winning_champion_four_id smallint,winning_champion_five_id smallint,loosing_champion_one_id smallint,loosing_champion_two_id smallint,loosing_champion_three_id smallint,loosing_champion_four_id smallint,loosing_champion_five_id smallint,constraint matches_match_id_pk primary key (match_id) ); create index matches_winning_champion_one_id_index on matches (winning_champion_one_id); create index matches_winning_champion_two_id_index on matches (winning_champion_two_id); create index matches_winning_champion_three_id_index on matches (winning_champion_three_id); create index matches_winning_champion_four_id_index on matches (winning_champion_four_id); create index matches_winning_champion_five_id_index on matches (winning_champion_five_id); create index matches_loosing_champion_ids_index on matches (loosing_champion_one_id,loosing_champion_two_id,loosing_champion_three_id,loosing_champion_four_id,loosing_champion_five_id); create index matches_loosing_champion_one_id_index on matches (loosing_champion_one_id); create index matches_loosing_champion_two_id_index on matches (loosing_champion_two_id); create index matches_loosing_champion_three_id_index on matches (loosing_champion_three_id); create index matches_loosing_champion_four_id_index on matches (loosing_champion_four_id); create index matches_loosing_champion_five_id_index on matches (loosing_champion_five_id); 该表可以有100米的行.目前它确实有大约20米的行. public.matches,2331648 rows,197 MB public.matches_riot_match_id_pk,153 MB public.matches_loosing_champion_ids_index,136 MB public.matches_loosing_champion_four_id_index,113 MB public.matches_loosing_champion_five_id_index,113 MB public.matches_winning_champion_one_id_index,113 MB public.matches_winning_champion_five_id_index,113 MB public.matches_winning_champion_three_id_index,112 MB public.matches_loosing_champion_three_id_index,112 MB public.matches_winning_champion_four_id_index,112 MB public.matches_loosing_champion_one_id_index,112 MB public.matches_winning_champion_two_id_index,112 MB public.matches_loosing_champion_two_id_index,112 MB 这些是我对postgresql.conf所做的唯一更改: max_connections = 50 shared_buffers = 6GB effective_cache_size = 18GB work_mem = 125829kB maintenance_work_mem = 1536MB min_wal_size = 1GB max_wal_size = 2GB checkpoint_completion_target = 0.7 wal_buffers = 16MB default_statistics_target = 100 max_parallel_workers_per_gather = 8 min_parallel_relation_size = 1 可能有些东西我忽略了. 编辑: 对于任何想知道的人.最好的方法是UNION ALL方法.不幸的是,建议的Erwin模式效果不佳.这是建议模式的EXPLAIN(ANALYZE,BUFFERS)输出: Limit (cost=2352157.06..2352157.06 rows=1 width=48) (actual time=1976.709..1976.710 rows=1 loops=1) Buffers: shared hit=653004 -> Sort (cost=2352157.06..2352977.77 rows=328287 width=48) (actual time=1976.708..1976.708 rows=1 loops=1) " Sort Key: (((((count(*)))::numeric * 1.0) / (((count(*)) + l.loss))::numeric)) DESC,(((count(*)) + l.loss)) DESC" Sort Method: top-N heapsort Memory: 25kB Buffers: shared hit=653004 -> Nested Loop (cost=2.10..2350515.62 rows=328287 width=48) (actual time=0.553..1976.294 rows=145 loops=1) Buffers: shared hit=653004 -> GroupAggregate (cost=1.67..107492.42 rows=492431 width=16) (actual time=0.084..1409.450 rows=154547 loops=1) Group Key: w.winner Buffers: shared hit=188208 -> Merge Join (cost=1.67..100105.96 rows=492431 width=8) (actual time=0.061..1301.578 rows=199530 loops=1) Merge Cond: (tm.team_id = w.winner) Buffers: shared hit=188208 -> Index Only Scan using team_member_champion_team_idx on team_member tm (cost=0.56..8978.79 rows=272813 width=8) (actual time=0.026..103.842 rows=265201 loops=1) Index Cond: (champion_id = 157) Heap Fetches: 0 Buffers: shared hit=176867 -> Index Only Scan using match_winner_loser_idx on match w (cost=0.43..79893.82 rows=2288093 width=8) (actual time=0.013..597.331 rows=2288065 loops=1) Heap Fetches: 0 Buffers: shared hit=11341 -> Subquery Scan on l (cost=0.43..4.52 rows=1 width=8) (actual time=0.003..0.003 rows=0 loops=154547) Filter: (((count(*)) + l.loss) > 19) Rows Removed by Filter: 0 Buffers: shared hit=464796 -> GroupAggregate (cost=0.43..4.49 rows=2 width=16) (actual time=0.003..0.003 rows=0 loops=154547) Group Key: l_1.loser Buffers: shared hit=464796 -> Index Only Scan using match_loser_winner_idx on match l_1 (cost=0.43..4.46 rows=2 width=8) (actual time=0.002..0.002 rows=0 loops=154547) Index Cond: (loser = w.winner) Heap Fetches: 0 Buffers: shared hit=464796 Planning time: 0.634 ms Execution time: 1976.792 ms 现在使用UNION ALL方法和新架构: Limit (cost=275211.80..275211.80 rows=1 width=48) (actual time=3540.420..3540.421 rows=1 loops=1) Buffers: shared hit=199478 CTE t -> Index Only Scan using team_member_champion_team_idx on team_member (cost=0.56..8978.79 rows=272813 width=8) (actual time=0.027..103.732 rows=265201 loops=1) Index Cond: (champion_id = 157) Heap Fetches: 0 Buffers: shared hit=176867 -> Sort (cost=266233.01..266233.51 rows=200 width=48) (actual time=3540.417..3540.417 rows=1 loops=1) " Sort Key: ((((count((true)))::numeric * 1.0) / (count(*))::numeric)) DESC,(count(*)) DESC" Sort Method: top-N heapsort Memory: 25kB Buffers: shared hit=199478 -> HashAggregate (cost=266228.01..266232.01 rows=200 width=48) (actual time=3455.112..3540.301 rows=145 loops=1) Group Key: t.team_id Filter: (count(*) > 19) Rows Removed by Filter: 265056 Buffers: shared hit=199478 -> Append (cost=30088.37..254525.34 rows=936214 width=9) (actual time=315.399..3137.115 rows=386575 loops=1) Buffers: shared hit=199478 -> Merge Join (cost=30088.37..123088.80 rows=492454 width=9) (actual time=315.398..1583.746 rows=199530 loops=1) Merge Cond: (match.winner = t.team_id) Buffers: shared hit=188208 -> Index Only Scan using match_winner_loser_idx on match (cost=0.43..79893.82 rows=2288093 width=8) (actual time=0.033..583.016 rows=2288065 loops=1) Heap Fetches: 0 Buffers: shared hit=11341 -> Sort (cost=30087.94..30769.97 rows=272813 width=8) (actual time=315.333..402.516 rows=310184 loops=1) Sort Key: t.team_id Sort Method: quicksort Memory: 24720kB Buffers: shared hit=176867 -> CTE Scan on t (cost=0.00..5456.26 rows=272813 width=8) (actual time=0.030..240.150 rows=265201 loops=1) Buffers: shared hit=176867 -> Merge Join (cost=30088.37..122074.39 rows=443760 width=9) (actual time=134.118..1410.484 rows=187045 loops=1) Merge Cond: (match_1.loser = t_1.team_id) Buffers: shared hit=11270 -> Index Only Scan using match_loser_winner_idx on match match_1 (cost=0.43..79609.82 rows=2288093 width=8) (actual time=0.025..589.773 rows=2288060 loops=1) Heap Fetches: 0 Buffers: shared hit=11270 -> Sort (cost=30087.94..30769.97 rows=272813 width=8) (actual time=134.076..219.529 rows=303364 loops=1) Sort Key: t_1.team_id Sort Method: quicksort Memory: 24720kB -> CTE Scan on t t_1 (cost=0.00..5456.26 rows=272813 width=8) (actual time=0.003..60.179 rows=265201 loops=1) Planning time: 0.401 ms Execution time: 3548.072 ms 解决方法
您的查询和解释输出看起来不是那么糟糕.不过,有几点意见:
> match_loosing_champion_ids_index上的index-only scan而不是索引扫描会更快.你没有看到的原因:无用的计数(match_id). 使用您的架构 查询1:LEFT JOIN LATERAL 这样,我们在加入之前聚合,不需要另一个GROUP BY.连接操作也更少.而count(*)应该取消阻止仅索引扫描: SELECT player1,player2,player3,player4,player5,((win * 1.0) / (win + loss))::numeric(5,5) AS winrate,win + loss AS matches FROM ( SELECT winning_champion_one_id AS player1,winning_champion_two_id AS player2,winning_champion_three_id AS player3,winning_champion_four_id AS player4,winning_champion_five_id AS player5,COUNT(*) AS win -- see below FROM matches WHERE 157 IN (winning_champion_one_id,winning_champion_five_id) GROUP BY 1,2,3,4,5 ) w LEFT JOIN LATERAL ( SELECT COUNT(*) AS loss -- see below FROM matches WHERE loosing_champion_one_id = w.player1 AND loosing_champion_two_id = w.player2 AND loosing_champion_three_id = w.player3 AND loosing_champion_four_id = w.player4 AND loosing_champion_five_id = w.player5 GROUP BY loosing_champion_one_id,loosing_champion_five_id ) l ON true WHERE win + loss > 19 ORDER BY winrate DESC,matches DESC LIMIT 1; 计数(*): 删除对match_id的唯一引用允许在matches_loosing_champion_ids_index上进行仅索引扫描!必须满足一些其他先决条件…… 查询2:UNION ALL 围绕昂贵的嵌套循环左连接和单个GROUP BY的另一种方法.但是我们再添加5个位图索引扫描.可能会更快或更快: SELECT player1,(count(win) * 1.0) / count(*) AS winrate -- I would round ...,count(*) AS matches FROM ( SELECT winning_champion_one_id AS player1,TRUE AS win FROM matches WHERE 157 IN (winning_champion_one_id,winning_champion_five_id) UNION ALL SELECT loosing_champion_one_id,loosing_champion_five_id,NULL AS win -- following "count(win)" ignores NULL values FROM matches WHERE 157 IN (loosing_champion_one_id,loosing_champion_five_id) ) m GROUP BY 1,5 HAVING count(*) > 19 -- min 20 matches -- AND count(win) > 0 -- min 1 win -- see below! ORDER BY winrate DESC,matches DESC LIMIT 1; AND count(win)> 0被注释掉了,因为它是多余的,而你无论如何都选择了最好的单一胜利. 不同的架构 我真的会使用不同的架构开始: CREATE TABLE team ( team_id serial PRIMARY KEY -- or bigserial if you expect > 2^31 distinct teams -- more attributes? ); CREATE TABLE player ( player_id smallserial PRIMARY KEY -- more attributes? ); CREATE TABLE team_member ( team_id integer REFERENCES team,player_id smallint REFERENCES player,team_pos smallint NOT NULL CHECK (team_pos BETWEEN 1 AND 5) -- only if position matters,PRIMARY KEY (team_id,player_id),UNIQUE (team_id,team_pos) ); CREATE INDEX team_member_player_team_idx on team_member (player_id,team_id); -- Enforce 5 players per team. Various options,different question. CREATE TABLE match ( match_id bigserial PRIMARY KEY,winner integer NOT NULL REFERENCES team,loser integer NOT NULL REFERENCES team,CHECK (winner <> loser) -- wouldn't make sense ); CREATE INDEX match_winner_loser_idx ON match (winner,loser); CREATE INDEX match_loser_winner_idx ON match (loser,winner); 辅助表添加到磁盘占用空间,但主表更小.最重要的是,您需要更少的索引,对于您的基数,这些索引应该要小得多. 询问 对于此等效查询,我们不需要任何其他索引.更简单,现在可能更快: SELECT winner,win * 1.0/ (win + loss) AS winrate,win + loss AS matches FROM ( SELECT w.winner,count(*) AS win FROM team_member tm JOIN match w ON w.winner = tm.team_id WHERE tm.player_id = 157 GROUP BY w.winner ) w LEFT JOIN LATERAL ( SELECT count(*) AS loss FROM match l WHERE l.loser = w.winner GROUP BY l.loser ) l ON true WHERE win + loss > 19 ORDER BY winrate DESC,matches DESC LIMIT 1; 将结果加入team_member以获得个人玩家. 您也可以尝试上面相应的UNION ALL技术: WITH t AS ( SELECT team_id FROM team_member WHERE player_id = 157 -- provide player here ) SELECT team_id,(count(win) * 1.0) / count(*) AS winrate,count(*) AS matches FROM ( SELECT t.team_id,TRUE AS win FROM t JOIN match ON winner = t.team_id UNION ALL SELECT t.team_id,NULL AS win FROM t JOIN match ON loser = t.team_id ) m GROUP BY 1 HAVING count(*) > 19 -- min 20 matches ORDER BY winrate DESC,matches DESC LIMIT 1; 绽放指数 我简要地为你的谓词考虑了bloom index: WHERE 157 IN (loosing_champion_one_id,loosing_champion_five_id) 没有测试,可能不会只支付5个smallint列. (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |