KingbaseES数据库分区表添加主键与索引的建议

发布时间 2023-09-19 18:46:05作者: KINGBASE研究院

一、初始化测试环境

# 数据库版本信息
KingbaseES V008R006C007B0012 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 4.1.2 20080704 (Red Hat 4.1.2-46), 64-bit

1. 创建分区表:

create table tb(id bigint,stat date,no bigint,pdate date,info varchar2(50)) partition by range(pdate) INTERVAL ('1 MONTH'::INTERVAL)
(
    PARTITION tb_p0 VALUES LESS THAN ('2019-01-01'),
    PARTITION tb_p1 VALUES LESS THAN ('2019-02-01'),
    PARTITION tb_p2 VALUES LESS THAN ('2019-03-01'),
    PARTITION tb_p3 VALUES LESS THAN ('2019-04-01'),
    PARTITION tb_p4 VALUES LESS THAN ('2019-05-01')
);

2. 创建随机日期、数字函数:

# 随机数函数
CREATE OR REPLACE FUNCTION RANDOM_NUMBER(INTEGER, INTEGER) RETURNS INTEGER AS
$BODY$
DECLARE
    START_INT ALIAS FOR $1;
    END_INT ALIAS FOR $2;
BEGIN
    RETURN trunc(random() * (END_INT - START_INT + 1) + START_INT);
END;
$BODY$
LANGUAGE plpgsql;

# 随机日期函数
CREATE OR REPLACE FUNCTION RANDOM_DAYS(START_DATE DATE, END_DATE DATE) RETURNS DATE AS
$BODY$
DECLARE
    INTERVAL_DAYS INTEGER;
    RANDOM_DAYS INTEGER;
    RANDOM_DATE DATE;
BEGIN
    INTERVAL_DAYS := END_DATE - START_DATE;
    RANDOM_DAYS := RANDOM_NUMBER(0, INTERVAL_DAYS);
    RANDOM_DATE := START_DATE + RANDOM_DAYS;
    RETURN RANDOM_DATE;
END;
$BODY$
LANGUAGE plpgsql;

3. insert生成数据:

test=# insert into tb select generate_series(1,100000000),clock_timestamp(),RANDOM_NUMBER(1,100000000),RANDOM_DAYS('2019-01-01'::date,'2019-05-31'::date),md5(random()::text) from dual;

INSERT 0 100000000
Time: 2081789.125 ms (34:41.789)
test=# select sys_size_pretty(sys_relation_size('tb_tb_p0'));
 sys_size_pretty 
-----------------
 0 bytes
(1 row)
Time: 36.671 ms
test=# select sys_size_pretty(sys_relation_size('tb_tb_p1'));
 sys_size_pretty 
-----------------
 1980 MB
(1 row)
Time: 0.648 ms
test=# select sys_size_pretty(sys_relation_size('tb_tb_p2'));
 sys_size_pretty 
-----------------
 1788 MB
(1 row)
Time: 0.591 ms
test=# select sys_size_pretty(sys_relation_size('tb_tb_p3'));
 sys_size_pretty 
-----------------
 1981 MB
(1 row)
Time: 0.551 ms
test=# select sys_size_pretty(sys_relation_size('tb_tb_p4'));
 sys_size_pretty 
-----------------
 1916 MB
(1 row)
Time: 0.674 ms
test=# select sys_size_pretty(sys_relation_size('tb_p5'));   
 sys_size_pretty 
-----------------
 1980 MB
(1 row)
Time: 0.668 ms

4. 添加主键:

test=# alter table tb add constraint tb_pk primary key(id,no);                                                                                                                                             
ALTER TABLE
Time: 194581.732 ms (03:14.582)
test=# \d+ tb
                                        Partitioned table "public.tb"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           | not null |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           | not null |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Partition key: RANGE (pdate)
Range interval: INTERVAL ('0-1'::pg_catalog.interval)
Indexes:
    "tb_pk" PRIMARY KEY, btree (id, no) INCLUDE (tableoid) GLOBAL 
Partitions: tb_p5 FOR VALUES FROM ('2019-05-01 00:00:00') TO ('2019-06-01 00:00:00'),
            tb_tb_p0 FOR VALUES FROM (MINVALUE) TO ('2019-01-01 00:00:00'),
            tb_tb_p1 FOR VALUES FROM ('2019-01-01 00:00:00') TO ('2019-02-01 00:00:00'),
            tb_tb_p2 FOR VALUES FROM ('2019-02-01 00:00:00') TO ('2019-03-01 00:00:00'),
            tb_tb_p3 FOR VALUES FROM ('2019-03-01 00:00:00') TO ('2019-04-01 00:00:00'),
            tb_tb_p4 FOR VALUES FROM ('2019-04-01 00:00:00') TO ('2019-05-01 00:00:00')

5. 创建索引:

# 创建local本地索引
test=# create index on tb(id,stat,no,info);                                                                                                                                                                
CREATE INDEX
Time: 248144.207 ms (04:08.144)

test=# \d+ tb
                                        Partitioned table "public.tb"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           | not null |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           | not null |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Partition key: RANGE (pdate)
Range interval: INTERVAL ('0-1'::pg_catalog.interval)
Indexes:
    "tb_pk" PRIMARY KEY, btree (id, no) INCLUDE (tableoid) GLOBAL 
    "tb_id_stat_no_info_idx" btree (id, stat, no, info)
Partitions: tb_p5 FOR VALUES FROM ('2019-05-01 00:00:00') TO ('2019-06-01 00:00:00'),
            tb_tb_p0 FOR VALUES FROM (MINVALUE) TO ('2019-01-01 00:00:00'),
            tb_tb_p1 FOR VALUES FROM ('2019-01-01 00:00:00') TO ('2019-02-01 00:00:00'),
            tb_tb_p2 FOR VALUES FROM ('2019-02-01 00:00:00') TO ('2019-03-01 00:00:00'),
            tb_tb_p3 FOR VALUES FROM ('2019-03-01 00:00:00') TO ('2019-04-01 00:00:00'),
            tb_tb_p4 FOR VALUES FROM ('2019-04-01 00:00:00') TO ('2019-05-01 00:00:00')

test=# \d+ tb_tb_p2
                                           Table "public.tb_tb_p2"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           | not null |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           | not null |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Partition of: tb FOR VALUES FROM ('2019-02-01 00:00:00') TO ('2019-03-01 00:00:00')
Partition constraint: (((pdate IS NULL) OR ((pdate)::timestamp without time zone >= '2019-02-01 00:00:00'::date)) AND ((pdate IS NOT NULL) AND ((pdate)::timestamp without time zone < '2019-03-01 00:00:00'::date)))
Indexes:
    "tb_tb_p2_id_stat_no_info_idx" btree (id, stat, no, info)
Access method: heap

# 使用global创建全局索引
# 查询表信息,发现使用global声明创建的索引也是local本地索引

test=# create index on tb(id,stat) global;  
CREATE INDEX
Time: 151558.143 ms (02:31.558)
test=# \d+ tb
                                        Partitioned table "public.tb"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           | not null |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           | not null |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Partition key: RANGE (pdate)
Range interval: INTERVAL ('0-1'::pg_catalog.interval)
Indexes:
    "tb_pk" PRIMARY KEY, btree (id, no) INCLUDE (tableoid) GLOBAL 
    "tb_id_stat_idx" btree (id, stat)
Partitions: tb_p5 FOR VALUES FROM ('2019-05-01 00:00:00') TO ('2019-06-01 00:00:00'),
            tb_tb_p0 FOR VALUES FROM (MINVALUE) TO ('2019-01-01 00:00:00'),
            tb_tb_p1 FOR VALUES FROM ('2019-01-01 00:00:00') TO ('2019-02-01 00:00:00'),
            tb_tb_p2 FOR VALUES FROM ('2019-02-01 00:00:00') TO ('2019-03-01 00:00:00'),
            tb_tb_p3 FOR VALUES FROM ('2019-03-01 00:00:00') TO ('2019-04-01 00:00:00'),
            tb_tb_p4 FOR VALUES FROM ('2019-04-01 00:00:00') TO ('2019-05-01 00:00:00')

test=# \d+ tb_tb_p4
                                           Table "public.tb_tb_p4"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           | not null |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           | not null |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Partition of: tb FOR VALUES FROM ('2019-04-01 00:00:00') TO ('2019-05-01 00:00:00')
Partition constraint: (((pdate IS NULL) OR ((pdate)::timestamp without time zone >= '2019-04-01 00:00:00'::date)) AND ((pdate IS NOT NULL) AND ((pdate)::timestamp without time zone < '2019-05-01 00:00:00'::date)))
Indexes:
    "tb_tb_p4_id_stat_idx" btree (id, stat)
Access method: heap

# 使用unique加分区键pdate创建全局索引(global声明)
# 查询表信息,创建的索引依然是local本地索引

test=# create unique index ON tb(id,pdate) global;                                                                                                                                                          
CREATE INDEX
Time: 87232.640 ms (01:27.233)
test=# \d+ tb
                                        Partitioned table "public.tb"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           |          |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           |          |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Partition key: RANGE (pdate)
Range interval: INTERVAL ('0-1'::pg_catalog.interval)
Indexes:
    "tb_id_pdate_idx" UNIQUE, btree (id, pdate)
Partitions: tb_p5 FOR VALUES FROM ('2019-05-01 00:00:00') TO ('2019-06-01 00:00:00'),
            tb_tb_p0 FOR VALUES FROM (MINVALUE) TO ('2019-01-01 00:00:00'),
            tb_tb_p1 FOR VALUES FROM ('2019-01-01 00:00:00') TO ('2019-02-01 00:00:00'),
            tb_tb_p2 FOR VALUES FROM ('2019-02-01 00:00:00') TO ('2019-03-01 00:00:00'),
            tb_tb_p3 FOR VALUES FROM ('2019-03-01 00:00:00') TO ('2019-04-01 00:00:00'),
            tb_tb_p4 FOR VALUES FROM ('2019-04-01 00:00:00') TO ('2019-05-01 00:00:00')

# 使用unique加非分区键创建全局索引(global声明)
# 查询表信息,全局索引创建成功

test=# create unique index ON tb(id,info) global;
CREATE INDEX
test=# \d+ tb
                                        Partitioned table "public.tb"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           |          |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           |          |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Partition key: RANGE (pdate)
Range interval: INTERVAL ('0-1'::pg_catalog.interval)
Indexes:
    "tb_id_info_idx" UNIQUE, btree (id, info) INCLUDE (tableoid) GLOBAL 
Partitions: tb_p5 FOR VALUES FROM ('2019-05-01 00:00:00') TO ('2019-06-01 00:00:00'),
            tb_tb_p0 FOR VALUES FROM (MINVALUE) TO ('2019-01-01 00:00:00'),
            tb_tb_p1 FOR VALUES FROM ('2019-01-01 00:00:00') TO ('2019-02-01 00:00:00'),
            tb_tb_p2 FOR VALUES FROM ('2019-02-01 00:00:00') TO ('2019-03-01 00:00:00'),
            tb_tb_p3 FOR VALUES FROM ('2019-03-01 00:00:00') TO ('2019-04-01 00:00:00'),
            tb_tb_p4 FOR VALUES FROM ('2019-04-01 00:00:00') TO ('2019-05-01 00:00:00')

总结:

1 添加主键的同时会创建主键列(字段)唯一索引(但是有唯一索引的列不一定是主键)。

2 主键字段不允许空值,添加主键过程中会自动添加not null非空约束,保证主键列值的唯一性。

3 分区表添加主键同时会在主键列创建一个全局索引(索引有GLOBAL。

4 在分区表创建索引(默认为local本地索引,全局索引需要global声明),会自动在每个分区上创建一个本地索引。

5 分区表唯一约束必须包含分区键。

6 分区表成功创建全局索引必须满足条件:索引类型是唯一索引(unique)并且不包含分区键 。

二、通用方法给分区表添加主键、创建索引

1. 设置主键字段非空约束:

最好先设置主键字段非空not null约束,然后再添加主键。

test=# alter table tb alter column id set not null;
ALTER TABLE
Time: 37.610 ms
test=# alter table tb alter column no set not null;
ALTER TABLE
Time: 3.629 ms
test=# alter table tb add constraint tb_pk primary key(id,no);
ALTER TABLE
Time: 161859.979 ms (02:41.860)

test=# \d+ tb
                                        Partitioned table "public.tb"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           | not null |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           | not null |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Partition key: RANGE (pdate)
Range interval: INTERVAL ('0-1'::pg_catalog.interval)
Indexes:
    "tb_pk" PRIMARY KEY, btree (id, no) INCLUDE (tableoid) GLOBAL 
Partitions: tb_p5 FOR VALUES FROM ('2019-05-01 00:00:00') TO ('2019-06-01 00:00:00'),
            tb_tb_p0 FOR VALUES FROM (MINVALUE) TO ('2019-01-01 00:00:00'),
            tb_tb_p1 FOR VALUES FROM ('2019-01-01 00:00:00') TO ('2019-02-01 00:00:00'),
            tb_tb_p2 FOR VALUES FROM ('2019-02-01 00:00:00') TO ('2019-03-01 00:00:00'),
            tb_tb_p3 FOR VALUES FROM ('2019-03-01 00:00:00') TO ('2019-04-01 00:00:00'),
            tb_tb_p4 FOR VALUES FROM ('2019-04-01 00:00:00') TO ('2019-05-01 00:00:00')

2. 设置maintenance_work_mem参数:

test=# set work_mem ='100MB';                                                         
SET
test=# set maintenance_work_mem ='500MB';
SET
test=# set temp_buffers ='100MB';
SET

# 然后设置非空约束not null添加主键
test=# alter table tb alter column id set not null;
ALTER TABLE
Time: 20.714 ms
test=# alter table tb alter column no set not null;
ALTER TABLE
Time: 2.202 ms
test=# alter table tb add constraint tb_pk primary key(id,no);
ALTER TABLE
Time: 170640.065 ms (02:50.640)
test=# \d+ tb
                                        Partitioned table "public.tb"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           | not null |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           | not null |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Partition key: RANGE (pdate)
Range interval: INTERVAL ('0-1'::pg_catalog.interval)
Indexes:
    "tb_pk" PRIMARY KEY, btree (id, no) INCLUDE (tableoid) GLOBAL 
Partitions: tb_p5 FOR VALUES FROM ('2019-05-01 00:00:00') TO ('2019-06-01 00:00:00'),
            tb_tb_p0 FOR VALUES FROM (MINVALUE) TO ('2019-01-01 00:00:00'),
            tb_tb_p1 FOR VALUES FROM ('2019-01-01 00:00:00') TO ('2019-02-01 00:00:00'),
            tb_tb_p2 FOR VALUES FROM ('2019-02-01 00:00:00') TO ('2019-03-01 00:00:00'),
            tb_tb_p3 FOR VALUES FROM ('2019-03-01 00:00:00') TO ('2019-04-01 00:00:00'),
            tb_tb_p4 FOR VALUES FROM ('2019-04-01 00:00:00') TO ('2019-05-01 00:00:00')

3. 创建索引:

# 创建本地local索引
test=# create index on tb(id,stat,no,info);                                                                                                                                                                
CREATE INDEX
Time: 250377.917 ms (04:10.378)
test=# \d+ tb
                                        Partitioned table "public.tb"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           | not null |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           | not null |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Partition key: RANGE (pdate)
Range interval: INTERVAL ('0-1'::pg_catalog.interval)
Indexes:
    "tb_pk" PRIMARY KEY, btree (id, no) INCLUDE (tableoid) GLOBAL 
    "tb_id_stat_no_info_idx" btree (id, stat, no, info)
Partitions: tb_p5 FOR VALUES FROM ('2019-05-01 00:00:00') TO ('2019-06-01 00:00:00'),
            tb_tb_p0 FOR VALUES FROM (MINVALUE) TO ('2019-01-01 00:00:00'),
            tb_tb_p1 FOR VALUES FROM ('2019-01-01 00:00:00') TO ('2019-02-01 00:00:00'),
            tb_tb_p2 FOR VALUES FROM ('2019-02-01 00:00:00') TO ('2019-03-01 00:00:00'),
            tb_tb_p3 FOR VALUES FROM ('2019-03-01 00:00:00') TO ('2019-04-01 00:00:00'),
            tb_tb_p4 FOR VALUES FROM ('2019-04-01 00:00:00') TO ('2019-05-01 00:00:00')

# 使用global创建全局索引
test=# create index on tb(id,stat) global; 
CREATE INDEX
Time: 160475.453 ms (02:40.475)
test=# \d+ tb
                                        Partitioned table "public.tb"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           | not null |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           | not null |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Partition key: RANGE (pdate)
Range interval: INTERVAL ('0-1'::pg_catalog.interval)
Indexes:
    "tb_pk" PRIMARY KEY, btree (id, no) INCLUDE (tableoid) GLOBAL 
    "tb_id_stat_idx" btree (id, stat)
Partitions: tb_p5 FOR VALUES FROM ('2019-05-01 00:00:00') TO ('2019-06-01 00:00:00'),
            tb_tb_p0 FOR VALUES FROM (MINVALUE) TO ('2019-01-01 00:00:00'),
            tb_tb_p1 FOR VALUES FROM ('2019-01-01 00:00:00') TO ('2019-02-01 00:00:00'),
            tb_tb_p2 FOR VALUES FROM ('2019-02-01 00:00:00') TO ('2019-03-01 00:00:00'),
            tb_tb_p3 FOR VALUES FROM ('2019-03-01 00:00:00') TO ('2019-04-01 00:00:00'),
            tb_tb_p4 FOR VALUES FROM ('2019-04-01 00:00:00') TO ('2019-05-01 00:00:00')

maintenance_work_mem参数是数据库在进行维护性操作( VACUUM 、 CREATE INDEX 和 ALTER TABLE ADD FOREIGN KEY )中使用的最大的内存量。

参数作用有以下几点:

  • 影响维护性操作的速度和效率,如果设置得太低,可能导致多次索引扫描或内存溢出到磁盘。
  • 影响自动垃圾回收(autovacuum)的触发和执行,如果设置得太高,可能导致内存不足或垃圾回收延迟。
  • 可以通过SET命令在本地会话中临时增加,以适应不同的维护性操作的需求。

分区表不支持以下添加主键语法:

create unique index tb_idx on tb(id);
alter table tb ADD CONSTRAINT tb_pk PRIMARY KEY using index tb_idx;

三、建议方法

使用on only关键字创建索引:

优点:

  • 创建索引时不会阻塞对分区表的并发插入、更新或者删除(INSERT,UPDATE,DELETE)操作。
  • 可以减少对分区表被锁定的时间。

缺点:

  • 分区表父表不支持CONCURRENTLY、parallel_workers选项,子分区支持CONCURRENTLY、parallel_workers选项。
  • 需要更多的工作量、花费较多的时间才能完成。

1.使用on only创建失效invalid索引,使所有的子分区不会自动应用该索引:

test=# create index tb_idx on only tb(id,stat,no,info);
CREATE INDEX
Time: 21.135 ms
test=# \d+ tb
                                        Partitioned table "public.tb"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           | not null |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           | not null |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Partition key: RANGE (pdate)
Range interval: INTERVAL ('0-1'::pg_catalog.interval)
Indexes:
    "tb_pk" PRIMARY KEY, btree (id, no) INCLUDE (tableoid) GLOBAL 
    "tb_idx" btree (id, stat, no, info) INVALID
Partitions: tb_p5 FOR VALUES FROM ('2019-05-01 00:00:00') TO ('2019-06-01 00:00:00'),
            tb_tb_p0 FOR VALUES FROM (MINVALUE) TO ('2019-01-01 00:00:00'),
            tb_tb_p1 FOR VALUES FROM ('2019-01-01 00:00:00') TO ('2019-02-01 00:00:00'),
            tb_tb_p2 FOR VALUES FROM ('2019-02-01 00:00:00') TO ('2019-03-01 00:00:00'),
            tb_tb_p3 FOR VALUES FROM ('2019-03-01 00:00:00') TO ('2019-04-01 00:00:00'),
            tb_tb_p4 FOR VALUES FROM ('2019-04-01 00:00:00') TO ('2019-05-01 00:00:00')

2.然后手动创建分区表子分区索引:

# 手动给分区表子分区创建索引
test=# create index CONCURRENTLY tb_tb_p0_idx on tb_tb_p0(id,stat,no,info);
CREATE INDEX
Time: 5.469 ms
test=# create index CONCURRENTLY tb_tb_p1_idx on tb_tb_p1(id,stat,no,info);
CREATE INDEX
Time: 50423.600 ms (00:50.424)
test=# create index CONCURRENTLY tb_tb_p2_idx on tb_tb_p2(id,stat,no,info);
CREATE INDEX
Time: 46672.063 ms (00:46.672)
test=# create index CONCURRENTLY tb_tb_p3_idx on tb_tb_p3(id,stat,no,info);
CREATE INDEX
Time: 50621.904 ms (00:50.622)
test=# create index CONCURRENTLY tb_tb_p4_idx on tb_tb_p4(id,stat,no,info);
CREATE INDEX
Time: 49248.159 ms (00:49.248)
test=# create index CONCURRENTLY tb_p5_idx on tb_p5(id,stat,no,info);
CREATE INDEX
Time: 52901.838 ms (00:52.902)

# 然后使用ALTER INDEX .. ATTACH PARTITION到父索引
test=# alter index tb_idx attach partition tb_tb_p0_idx;
ALTER INDEX
Time: 5.326 ms
test=# alter index tb_idx attach partition tb_tb_p1_idx;
ALTER INDEX
Time: 2.688 ms
test=# alter index tb_idx attach partition tb_tb_p2_idx;
ALTER INDEX
Time: 2.908 ms
test=# alter index tb_idx attach partition tb_tb_p3_idx;
ALTER INDEX
Time: 3.608 ms
test=# alter index tb_idx attach partition tb_tb_p4_idx;
ALTER INDEX
Time: 2.332 ms
test=# alter index tb_idx attach partition tb_p5_idx;
ALTER INDEX
Time: 37.518 ms

# 所有的子分区索引都附加到父索引后,父索引将自动标记为有效
test=# \d+ tb
                                        Partitioned table "public.tb"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           | not null |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           | not null |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Partition key: RANGE (pdate)
Range interval: INTERVAL ('0-1'::pg_catalog.interval)
Indexes:
    "tb_pk" PRIMARY KEY, btree (id, no) INCLUDE (tableoid) GLOBAL 
    "tb_idx" btree (id, stat, no, info)
Partitions: tb_p5 FOR VALUES FROM ('2019-05-01 00:00:00') TO ('2019-06-01 00:00:00'),
            tb_tb_p0 FOR VALUES FROM (MINVALUE) TO ('2019-01-01 00:00:00'),
            tb_tb_p1 FOR VALUES FROM ('2019-01-01 00:00:00') TO ('2019-02-01 00:00:00'),
            tb_tb_p2 FOR VALUES FROM ('2019-02-01 00:00:00') TO ('2019-03-01 00:00:00'),
            tb_tb_p3 FOR VALUES FROM ('2019-03-01 00:00:00') TO ('2019-04-01 00:00:00'),
            tb_tb_p4 FOR VALUES FROM ('2019-04-01 00:00:00') TO ('2019-05-01 00:00:00')

test=# \d+ tb_tb_p3
                                           Table "public.tb_tb_p3"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           | not null |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           | not null |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Partition of: tb FOR VALUES FROM ('2019-03-01 00:00:00') TO ('2019-04-01 00:00:00')
Partition constraint: (((pdate IS NULL) OR ((pdate)::timestamp without time zone >= '2019-03-01 00:00:00'::date)) AND ((pdate IS NOT NULL) AND ((pdate)::timestamp without time zone < '2019-04-01 00:00:00'::date)))
Indexes:
    "tb_tb_p3_idx" btree (id, stat, no, info)
Access method: heap

3.总结:

分区表父表不支持CONCURRENTLY、parallel_workers选项,但是子分区支持CONCURRENTLY、parallel_workers选项。

  • 使用on only方式在父表创建失效的索引。
  • 子分区使用CONCURRENTLY选项,手动创建子分区索引。
  • 然后使用ALTER INDEX .. ATTACH PARTITION到父索引,父索引会自动标记为有效。

四、分区表创建索引建议:

1.设置maintenance_work_mem参数:

通过在会话级别(session)设置maintenance_work_mem可以提升创建索引的效率。例如(set maintenance_work_mem='1024MB')

2.使用on only关键字+并发(CONCURRENTLY)进行分区表索引的创建:

优点:在创建索引时不阻塞DML操作,在生产环境进行分区表创建索引不会影响到业务。

缺点:会带来额外 CPU 和 I/O 开销,可能会拖慢其他操作。死锁或者违反唯一约束索引创建将会失败(INVALID),重新create 或者reindex就可以。

并发创建索引的过程可以分为三个阶段:第一阶段是创建无效的索引并加锁,第二阶段是扫描表中的所有可见元组构建索引并更新标志位,第三阶段是为缺少的元组补充索引并等待所有早于当前快照的事务结束。