加入收藏 | 设为首页 | 会员中心 | 我要投稿 李大同 (https://www.lidatong.com.cn/)- 科技、建站、经验、云计算、5G、大数据,站长网!
当前位置: 首页 > 百科 > 正文

postgresql分区表笔记

发布时间:2020-12-13 17:48:01 所属栏目:百科 来源:网络整理
导读:最近在做oracle数据库向pg迁移的事情,非分区表使用ora2pg很容易迁移,遇到分区表的时候,发现由于oracle中的分区表分区表都采用的“P_日期”作为分区名,ora2pg导出的sql中pg子表名成为“p_日期”,导致大量的表名重复出错,只有在ora2pg到处的文件上进行手

最近在做oracle数据库向pg迁移的事情,非分区表使用ora2pg很容易迁移,遇到分区表的时候,发现由于oracle中的分区表分区表都采用的“P_日期”作为分区名,ora2pg导出的sql中pg子表名成为“p_日期”,导致大量的表名重复出错,只有在ora2pg到处的文件上进行手工修改,这里整理一下pg分区表的知识。


#####################################################################################################

分区表建立过程

######################################################################################################

第一步:创建表

CREATE TABLET_WEIBO_DAILY(PARTITION_DATE BIGINT,DATA VARCHAR);


第二部,通过继承创建子表:

CREATE TABLE T_WEIBO_DAILY_P20110701 ( CHECK (
PARTITION_DATE IN (20110701) --AND PARTITION_DATE IN (20110701) AND PARTITION_DATE IN (20110701) AND PARTITION_DATE IN (20110701)
) ) INHERITS (T_WEIBO_DAILY);
CREATE TABLE T_WEIBO_DAILY_P20110702 ( CHECK (
PARTITION_DATE IN (20110702) --AND PARTITION_DATE IN (20110702) AND PARTITION_DATE IN (20110702) AND PARTITION_DATE IN (20110702)
) ) INHERITS (T_WEIBO_DAILY);
CREATE TABLE T_WEIBO_DAILY_P20110808 ( CHECK (
PARTITION_DATE IN (20110808) --AND PARTITION_DATE IN (20110808) AND PARTITION_DATE IN (20110808) AND PARTITION_DATE IN (20110808)
) ) INHERITS (T_WEIBO_DAILY);
CREATE TABLE T_WEIBO_DAILY_P20110807 ( CHECK (
PARTITION_DATE IN (20110807) --AND PARTITION_DATE IN (20110807) AND PARTITION_DATE IN (20110807) AND PARTITION_DATE IN (20110807)
) ) INHERITS (T_WEIBO_DAILY);
CREATE TABLE T_WEIBO_DAILY_P20110806 ( CHECK (
PARTITION_DATE IN (20110806) --AND PARTITION_DATE IN (20110806) AND PARTITION_DATE IN (20110806) AND PARTITION_DATE IN (20110806)
) ) INHERITS (T_WEIBO_DAILY);
CREATE TABLE T_WEIBO_DAILY_P20110805 ( CHECK (
PARTITION_DATE IN (20110805) --AND PARTITION_DATE IN (20110805) AND PARTITION_DATE IN (20110805) AND PARTITION_DATE IN (20110805)
) ) INHERITS (T_WEIBO_DAILY);
CREATE TABLE T_WEIBO_DAILY_P20110804 ( CHECK (
PARTITION_DATE IN (20110804) --AND PARTITION_DATE IN (20110804) AND PARTITION_DATE IN (20110804) AND PARTITION_DATE IN (20110804)
) ) INHERITS (T_WEIBO_DAILY);
CREATE TABLE T_WEIBO_DAILY_P20110803 ( CHECK (
PARTITION_DATE IN (20110803) --AND PARTITION_DATE IN (20110803) AND PARTITION_DATE IN (20110803) AND PARTITION_DATE IN (20110803)
) ) INHERITS (T_WEIBO_DAILY);
CREATE TABLE T_WEIBO_DAILY_P20110802 ( CHECK (
PARTITION_DATE IN (20110802) --AND PARTITION_DATE IN (20110802) AND PARTITION_DATE IN (20110802) AND PARTITION_DATE IN (20110802)
) ) INHERITS (T_WEIBO_DAILY);
CREATE TABLE T_WEIBO_DAILY_P20110809 ( CHECK (
PARTITION_DATE IN (20110809) --AND PARTITION_DATE IN (20110809) AND PARTITION_DATE IN (20110809) AND PARTITION_DATE IN (20110809)
) ) INHERITS (T_WEIBO_DAILY);
CREATE TABLE T_WEIBO_DAILY_P20110810 ( CHECK (
PARTITION_DATE IN (20110810) --AND PARTITION_DATE IN (20110810) AND PARTITION_DATE IN (20110810) AND PARTITION_DATE IN (20110810)
) ) INHERITS (T_WEIBO_DAILY);


第三部:子表上建立索引:

-- Create indexes on each partition table
CREATE INDEX T_WEIBO_DAILY_P20110701_PARTITION_DATE ON T_WEIBO_DAILY_P20110701 (PARTITION_DATE);
--CREATE INDEX P_20110701_PARTITION_DATE ON P_20110701 (PARTITION_DATE);
--CREATE INDEX P_20110701_PARTITION_DATE ON P_20110701 (PARTITION_DATE);
--CREATE INDEX P_20110701_PARTITION_DATE ON P_20110701 (PARTITION_DATE);
CREATE INDEX T_WEIBO_DAILY_P20110702_PARTITION_DATE ON T_WEIBO_DAILY_P20110702 (PARTITION_DATE);
--CREATE INDEX P_20110702_PARTITION_DATE ON P_20110702 (PARTITION_DATE);
--CREATE INDEX P_20110702_PARTITION_DATE ON P_20110702 (PARTITION_DATE);
--CREATE INDEX P_20110702_PARTITION_DATE ON P_20110702 (PARTITION_DATE);
CREATE INDEX T_WEIBO_DAILY_P20110808_PARTITION_DATE ON T_WEIBO_DAILY_P20110808 (PARTITION_DATE);
--CREATE INDEX P_20110808_PARTITION_DATE ON P_20110808 (PARTITION_DATE);
--CREATE INDEX P_20110808_PARTITION_DATE ON P_20110808 (PARTITION_DATE);
--CREATE INDEX P_20110808_PARTITION_DATE ON P_20110808 (PARTITION_DATE);
CREATE INDEX T_WEIBO_DAILY_P20110807_PARTITION_DATE ON T_WEIBO_DAILY_P20110807 (PARTITION_DATE);
--CREATE INDEX P_20110807_PARTITION_DATE ON P_20110807 (PARTITION_DATE);
--CREATE INDEX P_20110807_PARTITION_DATE ON P_20110807 (PARTITION_DATE);
--CREATE INDEX P_20110807_PARTITION_DATE ON P_20110807 (PARTITION_DATE);
CREATE INDEX T_WEIBO_DAILY_P20110806_PARTITION_DATE ON T_WEIBO_DAILY_P20110806 (PARTITION_DATE);
--CREATE INDEX P_20110806_PARTITION_DATE ON P_20110806 (PARTITION_DATE);
--CREATE INDEX P_20110806_PARTITION_DATE ON P_20110806 (PARTITION_DATE);
--CREATE INDEX P_20110806_PARTITION_DATE ON P_20110806 (PARTITION_DATE);
CREATE INDEX T_WEIBO_DAILY_P20110805_PARTITION_DATE ON T_WEIBO_DAILY_P20110805 (PARTITION_DATE);
--CREATE INDEX P_20110805_PARTITION_DATE ON P_20110805 (PARTITION_DATE);
--CREATE INDEX P_20110805_PARTITION_DATE ON P_20110805 (PARTITION_DATE);
--CREATE INDEX P_20110805_PARTITION_DATE ON P_20110805 (PARTITION_DATE);
CREATE INDEX T_WEIBO_DAILY_P20110804_PARTITION_DATE ON T_WEIBO_DAILY_P20110804 (PARTITION_DATE);
--CREATE INDEX P_20110804_PARTITION_DATE ON P_20110804 (PARTITION_DATE);
--CREATE INDEX P_20110804_PARTITION_DATE ON P_20110804 (PARTITION_DATE);
--CREATE INDEX P_20110804_PARTITION_DATE ON P_20110804 (PARTITION_DATE);
CREATE INDEX T_WEIBO_DAILY_P20110803_PARTITION_DATE ON T_WEIBO_DAILY_P20110803 (PARTITION_DATE);
--CREATE INDEX P_20110803_PARTITION_DATE ON P_20110803 (PARTITION_DATE);
--CREATE INDEX P_20110803_PARTITION_DATE ON P_20110803 (PARTITION_DATE);
--CREATE INDEX P_20110803_PARTITION_DATE ON P_20110803 (PARTITION_DATE);
CREATE INDEX T_WEIBO_DAILY_P20110802_PARTITION_DATE ON T_WEIBO_DAILY_P20110802 (PARTITION_DATE);
--CREATE INDEX P_20110802_PARTITION_DATE ON P_20110802 (PARTITION_DATE);
--CREATE INDEX P_20110802_PARTITION_DATE ON P_20110802 (PARTITION_DATE);
--CREATE INDEX P_20110802_PARTITION_DATE ON P_20110802 (PARTITION_DATE);
CREATE INDEX T_WEIBO_DAILY_P20110809_PARTITION_DATE ON T_WEIBO_DAILY_P20110809 (PARTITION_DATE);
--CREATE INDEX P_20110809_PARTITION_DATE ON P_20110809 (PARTITION_DATE);
--CREATE INDEX P_20110809_PARTITION_DATE ON P_20110809 (PARTITION_DATE);
--CREATE INDEX P_20110809_PARTITION_DATE ON P_20110809 (PARTITION_DATE);
CREATE INDEX T_WEIBO_DAILY_P20110810_PARTITION_DATE ON T_WEIBO_DAILY_P20110810 (PARTITION_DATE);
--CREATE INDEX P_20110810_PARTITION_DATE ON P_20110810 (PARTITION_DATE);
--CREATE INDEX P_20110810_PARTITION_DATE ON P_20110810 (PARTITION_DATE);
--CREATE INDEX P_20110810_PARTITION_DATE ON P_20110810 (PARTITION_DATE);




第四部:创建触发函数:

CREATE OR REPLACE FUNCTION T_WEIBO_DAILY_insert_trigger()
RETURNS TRIGGER AS $$
BEGIN
IF ( NEW.PARTITION_DATE IN (20110701) ) THEN INSERT INTO T_WEIBO_DAILY_P20110701 VALUES (NEW.*);
ELSIF ( NEW.PARTITION_DATE IN (20110702) ) THEN INSERT INTO T_WEIBO_DAILY_P20110702 VALUES (NEW.*);
ELSIF ( NEW.PARTITION_DATE IN (20110808) ) THEN INSERT INTO T_WEIBO_DAILY_P20110808 VALUES (NEW.*);
ELSIF ( NEW.PARTITION_DATE IN (20110807) ) THEN INSERT INTO T_WEIBO_DAILY_P20110807 VALUES (NEW.*);
ELSIF ( NEW.PARTITION_DATE IN (20110806) ) THEN INSERT INTO T_WEIBO_DAILY_P20110806 VALUES (NEW.*);
ELSIF ( NEW.PARTITION_DATE IN (20110805) ) THEN INSERT INTO T_WEIBO_DAILY_P20110805 VALUES (NEW.*);
ELSIF ( NEW.PARTITION_DATE IN (20110804) ) THEN INSERT INTO T_WEIBO_DAILY_P20110804 VALUES (NEW.*);
ELSIF ( NEW.PARTITION_DATE IN (20110803) ) THEN INSERT INTO T_WEIBO_DAILY_P20110803 VALUES (NEW.*);
ELSIF ( NEW.PARTITION_DATE IN (20110802) ) THEN INSERT INTO T_WEIBO_DAILY_P20110802 VALUES (NEW.*);
ELSIF ( NEW.PARTITION_DATE IN (20110809) ) THEN INSERT INTO T_WEIBO_DAILY_P20110809 VALUES (NEW.*);
ELSIF ( NEW.PARTITION_DATE IN (20110810) ) THEN INSERT INTO T_WEIBO_DAILY_P20110810 VALUES (NEW.*);


ELSE
--INSERT INTO T_WEIBO_DAILY VALUES (NEW.*);


-- Or if you prefer raising an exception
RAISE EXCEPTION 'Value out of range. Fix the T_WEIBO_DAILY_insert_trigger() function!';
END IF;
RETURN NULL;
END;
$$
LANGUAGE plpgsql;


第五步:建立触发器

CREATE TRIGGER insert_T_WEIBO_DAILY_trigger
BEFORE INSERT ON T_WEIBO_DAILY
FOR EACH ROW EXECUTE PROCEDURE T_WEIBO_DAILY_insert_trigger();


##################################################################################################

分区表的分区剪枝

###################################################################################################

版本信息和constraint_exclusion如下:

ta=#SELECTVERSION();
version
----------------------------------------------------------------------------------------------------------------------
PostgreSQL9.1.2onx86_64-unknown-linux-gnu,compiledbygcc(GCC)4.1.220070115(prerelease)(SUSELinux),64-bit
(1row)

ta=#showconstraint_exclusion;
constraint_exclusion
----------------------
partition
(1row)

看了一下postgresql.conf,constraint_exclusion 默认为paritition,文档中对这个参数描述如下:

constraint_exclusion (enum)
Controls the query planner iss use of table constraints to optimize queries. The allowed
values of constraint_exclusion are on (examine constraints for all tables),off (never examine
constraints),and partition (examine constraints only for inheritance child tables and UNION ALL
subqueries). partition is the default setting.

When this parameter allows it for a particular table,the planner compares query conditions
with the table is CHECK constraints,and omits scanning tables for which the conditions contradict the constraints.


也就是说constraint_exclusion 有三个选项(on,off,partition)作用如下:

on :对所有的表都会进行约束检查

off:对所有表都不进行约束检查

parition:只对继承的子表和UNION ALL的子查询进行约束检查


根据说明:没有必要对所有表在生成查询计划时都进行约束检查,对分区表需要分区剪枝,所以partition比较合适。


另:测试发现分区剪枝只发生在where条件后面,使用select * from table parition(**)这种写法分区剪枝不起作用!

ta=# explain select * from ta.T_WEIBO_DAILY where partition_date=20110701;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------
Result (cost=0.00..7.50 rows=3 width=236)
-> Append (cost=0.00..7.50 rows=3 width=236)
-> Seq Scan on t_weibo_daily (cost=0.00..0.00 rows=1 width=236)
Filter: (partition_date = 20110701)
-> Bitmap Heap Scan on t_weibo_daily_p20110701 t_weibo_daily (cost=3.27..7.50 rows=2 width=236)
Recheck Cond: (partition_date = 20110701)
-> Bitmap Index Scan on t_weibo_daily_p20110701_partition_date (cost=0.00..3.27 rows=2 width=0)
Index Cond: (partition_date = 20110701)
(8 rows)

ta=# explainselect*fromta.T_WEIBO_DAILYpartition(t_weibo_daily_p20110701);
QUERYPLAN
--------------------------------------------------------------------------------------------------
Result(cost=0.00..143.00rows=3301width=236)
->Append(cost=0.00..143.00rows=3301width=236)
->SeqScanont_weibo_dailypartition(cost=0.00..0.00rows=1width=236)
->SeqScanont_weibo_daily_p20110701partition(cost=0.00..13.00rows=300width=236)
->SeqScanont_weibo_daily_p20110702partition(cost=0.00..13.00rows=300width=236)
->SeqScanont_weibo_daily_p20110808partition(cost=0.00..13.00rows=300width=236)
->SeqScanont_weibo_daily_p20110807partition(cost=0.00..13.00rows=300width=236)
->SeqScanont_weibo_daily_p20110806partition(cost=0.00..13.00rows=300width=236)
->SeqScanont_weibo_daily_p20110805partition(cost=0.00..13.00rows=300width=236)
->SeqScanont_weibo_daily_p20110804partition(cost=0.00..13.00rows=300width=236)
->SeqScanont_weibo_daily_p20110803partition(cost=0.00..13.00rows=300width=236)
->SeqScanont_weibo_daily_p20110802partition(cost=0.00..13.00rows=300width=236)
->SeqScanont_weibo_daily_p20110809partition(cost=0.00..13.00rows=300width=236)
->SeqScanont_weibo_daily_p20110810partition(cost=0.00..13.00rows=300width=236)
(14rows)

不清楚是不是我的第二个写法有问题??


##########################################################################

遗留的需要思考和解决的问题

1. pg分区这么复杂,怎样包装能让用户一个sql或者一个命令完成分区的增减。

2. 怎样让用户能简单的创建一个分区表,(或许可以参考greenplum)

3. 分区剪枝使用parition方式访问问题的问题,要么就报错,要么就要正确找到分区,在海量数据中没有分区剪枝是个灾难。

##########################################################################

(编辑:李大同)

【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!

    推荐文章
      热点阅读