KingbaseES分区表一声明式创建自动分区表

发布时间 2023-04-06 11:08:48作者: nwwhile

KingbaseES分区表一声明式创建自动分区表

一、声明式创建分区:

1. 创建分区表同时创建分区:

1.1 准备环境:

# 创建分区表同时创建分区
# 使用INTERVAL ('1 MONTH'::INTERVAL)指定按月自动添加分区
create table tb1(id bigint,stat date,no bigint,pdate date,info varchar2(50)) partition by range(pdate) INTERVAL ('1 MONTH'::INTERVAL)
(
PARTITION tb1_p1 VALUES LESS THAN ('2019-01-01'),
PARTITION tb1_p2 VALUES LESS THAN ('2019-02-01'),
PARTITION tb1_p3 VALUES LESS THAN ('2019-03-01'),
PARTITION tb1_p4 VALUES LESS THAN ('2019-04-01')
);

# 查看分区表父表
test=# \d+ tb1
                                        Partitioned table "public.tb1"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           |          |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           |          |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Partition key: RANGE (pdate)
Range interval: INTERVAL ('0-1'::pg_catalog.interval)
Partitions: tb1_tb1_p1 FOR VALUES FROM (MINVALUE) TO ('2019-01-01 00:00:00'),
            tb1_tb1_p2 FOR VALUES FROM ('2019-01-01 00:00:00') TO ('2019-02-01 00:00:00'),
            tb1_tb1_p3 FOR VALUES FROM ('2019-02-01 00:00:00') TO ('2019-03-01 00:00:00'),
            tb1_tb1_p4 FOR VALUES FROM ('2019-03-01 00:00:00') TO ('2019-04-01 00:00:00')

# 查看分区表子表
test=# \d+ tb1_tb1_p1
                                          Table "public.tb1_tb1_p1"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           |          |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           |          |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Partition of: tb1 FOR VALUES FROM (MINVALUE) TO ('2019-01-01 00:00:00')
Partition constraint: ((pdate IS NOT NULL) AND ((pdate)::timestamp without time zone < '2019-01-01 00:00:00'::date))
Access method: heap

1.2 对分区表添加主键:

# 分区表添加主键
test=# alter table tb1 add constraint tb1_pk primary key(id);
ALTER TABLE

# 查看分区表父表
test=# \d+ tb1
                                        Partitioned table "public.tb1"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           | not null |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           |          |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Partition key: RANGE (pdate)
Range interval: INTERVAL ('0-1'::pg_catalog.interval)
Indexes:
    "tb1_pk" PRIMARY KEY, btree (id) INCLUDE (tableoid) GLOBAL 
Partitions: tb1_tb1_p1 FOR VALUES FROM (MINVALUE) TO ('2019-01-01 00:00:00'),
            tb1_tb1_p2 FOR VALUES FROM ('2019-01-01 00:00:00') TO ('2019-02-01 00:00:00'),
            tb1_tb1_p3 FOR VALUES FROM ('2019-02-01 00:00:00') TO ('2019-03-01 00:00:00'),
            tb1_tb1_p4 FOR VALUES FROM ('2019-03-01 00:00:00') TO ('2019-04-01 00:00:00')
            
# 查看分区表子表
test=# \d+ tb1_tb1_p1
                                          Table "public.tb1_tb1_p1"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           | not null |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           |          |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Partition of: tb1 FOR VALUES FROM (MINVALUE) TO ('2019-01-01 00:00:00')
Partition constraint: ((pdate IS NOT NULL) AND ((pdate)::timestamp without time zone < '2019-01-01 00:00:00'::date))
Access method: heap

# 查询user_indexes视图tb1_pk索引
test=# select index_name,index_type,table_name,table_type,uniqueness,compression from user_indexes where index_name =upper('tb1_pk');
 index_name | index_type | table_name | table_type | uniqueness | compression 
------------+------------+------------+------------+------------+-------------
 TB1_PK     | BTREE      | TB1        | TABLE      | UNIQUE     | DISABLED
(1 row)

分区表添加主键:

1 添加主键的同时会创建主键列(字段)唯一索引(但是有唯一索引的列不一定是主键)。

2 主键字段不允许空值,添加主键过程中会自动添加not null非空约束,保证主键列值的唯一性。

3 分区表添加主键同时创建的索引(索引有GLOBAL)是全局索引。

4 分区表会在主键列创建一个全局(global)索引,默认为添加主键列的同时创建全局索引。

5 分区表唯一约束必须包含分区键。

1.3 对分区表创建索引:

# 分区表创建索引
create index on tb1 (no) local;
CREATE INDEX
create index on tb1 (id,no) global;
CREATE INDEX

# 查看tb1表信息
test=# \d+ tb1
                                        Partitioned table "public.tb1"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           | not null |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           |          |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Partition key: RANGE (pdate)
Range interval: INTERVAL ('0-1'::pg_catalog.interval)
Indexes:
    "tb1_pk" PRIMARY KEY, btree (id) INCLUDE (tableoid) GLOBAL 
    "tb1_id_no_idx" btree (id, no)
    "tb1_no_idx" btree (no)
Partitions: tb1_tb1_p1 FOR VALUES FROM (MINVALUE) TO ('2019-01-01 00:00:00'),
            tb1_tb1_p2 FOR VALUES FROM ('2019-01-01 00:00:00') TO ('2019-02-01 00:00:00'),
            tb1_tb1_p3 FOR VALUES FROM ('2019-02-01 00:00:00') TO ('2019-03-01 00:00:00'),
            tb1_tb1_p4 FOR VALUES FROM ('2019-03-01 00:00:00') TO ('2019-04-01 00:00:00')
            
# 查看分区表tb1子表信息
test=# \d+ tb1_tb1_p2
                                          Table "public.tb1_tb1_p2"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           | not null |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           |          |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Partition of: tb1 FOR VALUES FROM ('2019-01-01 00:00:00') TO ('2019-02-01 00:00:00')
Partition constraint: (((pdate IS NULL) OR ((pdate)::timestamp without time zone >= '2019-01-01 00:00:00'::date)) AND ((pdate IS NOT NULL) AND ((pdate)::timestamp without time zone < '2019-02-01 00:00:00'::date)))
Indexes:
    "tb1_tb1_p2_id_no_idx" btree (id, no)
    "tb1_tb1_p2_no_idx" btree (no)
Access method: heap

分区表创建索引:

1 在分区表创建本地索引,会自动在每个分区上创建一个本地索引。

2 分区表只能在主键列创建一个全局(global)索引,默认为添加主键列创建的索引。

3 分区表创建全局索引必须满足条件:索引类型是唯一索引(unique)并且不包含分区键 。

4 分区表父表不支持CONCURRENTLY、parallel_workers选项,子分区支持CONCURRENTLY、parallel_workers选项。

1.4 使用ATTACH PARTITION将普通表转换为分区表子分区:

# 创建普通表
test=# create table tb1_tb1_p5(id bigint,stat date,no bigint,pdate date,info varchar2(50));
CREATE TABLE

test=# \d+ tb1_tb1_p5
                                          Table "public.tb1_tb1_p5"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           |          |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           |          |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Access method: heap

# 使用ATTACH PARTITION将普通表转换为分区表子分区
test=# alter table tb1 ATTACH PARTITION tb1_tb1_p5 for VALUES FROM('2019-05-01') TO ('2019-05-31');
ERROR:  column "id" in child table must be marked NOT NULL

# 创建的普通表,表结构、约束必须跟分区表一致
test=# alter table tb1_tb1_p5 alter id set not null;
ALTER TABLE

test=# \d+ tb1_tb1_p5                               
                                          Table "public.tb1_tb1_p5"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           | not null |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           |          |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Access method: heap

test=# alter table tb1 ATTACH PARTITION tb1_tb1_p5 for VALUES FROM('2019-05-01') TO ('2019-05-31');
ALTER TABLE

# 查看ATTACH后的分区表
test=# \d+ tb1
                                        Partitioned table "public.tb1"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           | not null |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           |          |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Partition key: RANGE (pdate)
Range interval: INTERVAL ('0-1'::pg_catalog.interval)
Indexes:
    "tb1_pk" PRIMARY KEY, btree (id) INCLUDE (tableoid) GLOBAL 
    "tb1_id_no_idx" btree (id, no)
    "tb1_no_idx" btree (no)
Partitions: tb1_tb1_p1 FOR VALUES FROM (MINVALUE) TO ('2019-01-01 00:00:00'),
            tb1_tb1_p2 FOR VALUES FROM ('2019-01-01 00:00:00') TO ('2019-02-01 00:00:00'),
            tb1_tb1_p3 FOR VALUES FROM ('2019-02-01 00:00:00') TO ('2019-03-01 00:00:00'),
            tb1_tb1_p4 FOR VALUES FROM ('2019-03-01 00:00:00') TO ('2019-04-01 00:00:00'),
            tb1_tb1_p5 FOR VALUES FROM ('2019-05-01 00:00:00') TO ('2019-05-31 00:00:00')
            
test=# \d+ tb1_tb1_p5
                                          Table "public.tb1_tb1_p5"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           | not null |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           |          |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Partition of: tb1 FOR VALUES FROM ('2019-05-01 00:00:00') TO ('2019-05-31 00:00:00')
Partition constraint: (((pdate IS NULL) OR ((pdate)::timestamp without time zone >= '2019-05-01 00:00:00'::date)) AND ((pdate IS NOT NULL) AND ((pdate)::timestamp without time zone < '2019-05-31 00:00:00'::date)))
Indexes:
    "tb1_tb1_p5_id_no_idx" btree (id, no)
    "tb1_tb1_p5_no_idx" btree (no)
Access method: heap

ATTACH PARTITION将普通表转换为分区表子分区:

1 ATTACH普通表、分区表的列、字段类型、长度、约束必须一致。

2 分区表的unique和primary key约束将被应用在ATTACH新的子分区。

3 ATTACH过程中如果普通表有数据,会使用全表扫描检查数据是否违反分区约束(可以在ATTACH前使用约束筛选复合条件的数据)。

4 ATTACH外部表,不需要验证外部表中的数据符合分区约束。

5 如果ATTACH的表有跟分区不一致的索引,分区表会应用ATTACH表的索引。

2. 使用Create Table为分区表添加子分区:

使用Create Table语句创建分区表子分区也会自动添加约束及索引。

# 使用Create Table语句创建分区表子分区
test=# CREATE TABLE tb1_tb1_p5 PARTITION OF tb1 FOR VALUES FROM ('2019-04-01') TO ('2019-04-30');
CREATE TABLE

test=# \d+ tb1
                                        Partitioned table "public.tb1"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           | not null |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           |          |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Partition key: RANGE (pdate)
Range interval: INTERVAL ('0-1'::pg_catalog.interval)
Indexes:
    "tb1_pk" PRIMARY KEY, btree (id) INCLUDE (tableoid) GLOBAL 
    "tb1_id_no_idx" btree (id, no)
    "tb1_no_idx" btree (no)
Partitions: tb1_tb1_p1 FOR VALUES FROM (MINVALUE) TO ('2019-01-01 00:00:00'),
            tb1_tb1_p2 FOR VALUES FROM ('2019-01-01 00:00:00') TO ('2019-02-01 00:00:00'),
            tb1_tb1_p3 FOR VALUES FROM ('2019-02-01 00:00:00') TO ('2019-03-01 00:00:00'),
            tb1_tb1_p4 FOR VALUES FROM ('2019-03-01 00:00:00') TO ('2019-04-01 00:00:00'),
            tb1_tb1_p5 FOR VALUES FROM ('2019-04-01 00:00:00') TO ('2019-04-30 00:00:00')

test=# \d+ tb1_tb1_p5
                                          Table "public.tb1_tb1_p5"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           | not null |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           |          |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Partition of: tb1 FOR VALUES FROM ('2019-04-01 00:00:00') TO ('2019-04-30 00:00:00')
Partition constraint: (((pdate IS NULL) OR ((pdate)::timestamp without time zone >= '2019-04-01 00:00:00'::date)) AND ((pdate IS NOT NULL) AND ((pdate)::timestamp without time zone < '2019-04-30 00:00:00'::date)))
Indexes:
    "tb1_tb1_p5_id_no_idx" btree (id, no)
    "tb1_tb1_p5_no_idx" btree (no)
Access method: heap

3. 申明式创建分区总结:

  1. 声明式分区,子分区和分区表列、类型、约束必须一致。

  2. 在申明式创建的分区表上创建索引,会自动将索引应用于所有的子分区。

  3. 分区表惟一约束必须包括分区键。

  4. 不能创建包含所有子分区的排除约束,只能每个子分区单独创建。

  5. 在分区表创建索引时(不可使用CONCURRENTLY),可使用on only在分区表创建标记失效的索引,避免大表创建索引耗时太久(子分区不会自动应用该索引),然后在所有子分区单独创建索引(可使用CONCURRENTLY),最后使用ALTER INDEX .. ATTACH PARTITION附加到到父索引,所有子分区索引附加到父索引后会自动标记为有效。

# 分区表不支持使用CONCURRENTLY在父表创建索引
test=# create index CONCURRENTLY on tb1(info);
ERROR:  cannot create index on partitioned table "tb1" concurrently
Time: 0.519 ms

# 使用on only在分区表创建索引
test=# create index on only tb1(info);    
CREATE INDEX
Time: 1.845 ms

# 查看分区表tb1信息,tb1_info_idx标记为无效invalid
test=# \d+ tb1
                                        Partitioned table "public.tb1"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           | not null |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           |          |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Partition key: RANGE (pdate)
Range interval: INTERVAL ('0-1'::pg_catalog.interval)
Indexes:
    "tb1_pk" PRIMARY KEY, btree (id) INCLUDE (tableoid) GLOBAL 
    "tb1_id_no_idx" btree (id, no)
    "tb1_info_idx" btree (info) INVALID
    "tb1_no_idx" btree (no)
Partitions: tb1_tb1_p1 FOR VALUES FROM (MINVALUE) TO ('2019-01-01 00:00:00'),
            tb1_tb1_p2 FOR VALUES FROM ('2019-01-01 00:00:00') TO ('2019-02-01 00:00:00'),
            tb1_tb1_p3 FOR VALUES FROM ('2019-02-01 00:00:00') TO ('2019-03-01 00:00:00'),
            tb1_tb1_p4 FOR VALUES FROM ('2019-03-01 00:00:00') TO ('2019-04-01 00:00:00'),
            tb1_tb1_p5 FOR VALUES FROM ('2019-04-01 00:00:00') TO ('2019-04-30 00:00:00')

# 单独创建所有子分区索引
test=# create index tb1_tb1_p1_info_idx on tb1_tb1_p1(info);
CREATE INDEX
test=# create index tb1_tb1_p2_info_idx on tb1_tb1_p2(info);
CREATE INDEX
test=# create index tb1_tb1_p3_info_idx on tb1_tb1_p3(info);
CREATE INDEX
test=# create index tb1_tb1_p4_info_idx on tb1_tb1_p4(info);
CREATE INDEX
test=# create index tb1_tb1_p5_info_idx on tb1_tb1_p5(info);
CREATE INDEX

# 使用attach partition将所有子分区索引附加到父表
test=# alter index tb1_info_idx attach partition tb1_tb1_p1_info_idx;
ALTER INDEX
test=# alter index tb1_info_idx attach partition tb1_tb1_p2_info_idx;
ALTER INDEX
test=# alter index tb1_info_idx attach partition tb1_tb1_p3_info_idx;
ALTER INDEX
test=# alter index tb1_info_idx attach partition tb1_tb1_p4_info_idx;
ALTER INDEX
test=# alter index tb1_info_idx attach partition tb1_tb1_p5_info_idx;
ALTER INDEX

# 查看分区表tb1信息,tb1_info_idx自动标记为有效
test=# \d+ tb1
                                        Partitioned table "public.tb1"
 Column |            Type            | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+----------------------------+-----------+----------+---------+----------+--------------+-------------
 id     | bigint                     |           | not null |         | plain    |              | 
 stat   | date                       |           |          |         | plain    |              | 
 no     | bigint                     |           |          |         | plain    |              | 
 pdate  | date                       |           |          |         | plain    |              | 
 info   | character varying(50 char) |           |          |         | extended |              | 
Partition key: RANGE (pdate)
Range interval: INTERVAL ('0-1'::pg_catalog.interval)
Indexes:
    "tb1_pk" PRIMARY KEY, btree (id) INCLUDE (tableoid) GLOBAL 
    "tb1_id_no_idx" btree (id, no)
    "tb1_info_idx" btree (info)
    "tb1_no_idx" btree (no)
Partitions: tb1_tb1_p1 FOR VALUES FROM (MINVALUE) TO ('2019-01-01 00:00:00'),
            tb1_tb1_p2 FOR VALUES FROM ('2019-01-01 00:00:00') TO ('2019-02-01 00:00:00'),
            tb1_tb1_p3 FOR VALUES FROM ('2019-02-01 00:00:00') TO ('2019-03-01 00:00:00'),
            tb1_tb1_p4 FOR VALUES FROM ('2019-03-01 00:00:00') TO ('2019-04-01 00:00:00'),
            tb1_tb1_p5 FOR VALUES FROM ('2019-04-01 00:00:00') TO ('2019-04-30 00:00:00')