MySQL DML 闪回之 binlog2sql

发布时间 2023-07-31 15:30:55作者: 201432273

一直以来,由于 DBA 的误操作或者业务bug,导致误删数据的情况都时有发生。当出现误删数据的情况时,从线上操作日志构造误删除的数据,或者DBA使用binlog和备份的方式恢复数据,不管哪种,都非常费时费力,并且容易

出错。可能有的同学会说从 从库恢复,但实时主从备份只能防止硬件问题,比如主库的硬盘损坏。但对于误操作,则无能为力。比如在主库误删一张表,或者一个update语句没有指定where条件,导致全表被更新。当操作被同步到从库上后,则主从都无能为力。

因此数据回滚需求就显得极为重要。这方面比较成熟的回滚工具是由美团点评开源的 binlog2sql 工具,本文主要记录回滚的操作流程。

binlog2sql 的核心原理: 解析 binlog 日志文件,将 insert 语句 改写成 delete 或者 delete 改为 insert;对于 update 操作, 将修改前的数据和修改后的数据互换,最终生成 回滚的 SQL 语句。

测试表:
mysql> show create table t1 \G;
*************************** 1. row ***************************
       Table: t1
Create Table: CREATE TABLE `t1` (
  `id` bigint(20) NOT NULL AUTO_INCREMENT,
  `name` varchar(20) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
  `age` int(11) DEFAULT NULL,
  `hobby` varchar(30) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=121 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
1 row in set (0.00 sec)
 
ERROR:
No query specified
 
mysql> select * from t1;
+-----+---------+------+-----------+
| id  | name    | age  | hobby     |
+-----+---------+------+-----------+
| 101 | siri04  |   21 | game41    |
| 102 | siri05  |   22 | game42    |
| 103 | siri06  |   23 | game43    |
| 104 | siri07  |   24 | game44    |
| 105 | siri08  |   25 | game45    |
| 106 | siri09  |   26 | game46    |
| 107 | siri14  |   27 | game47    |
| 108 | siri24  |   28 | game48    |
| 109 | siri34  |   29 | game49    |
| 110 | siri44  |   30 | game50    |
| 111 | siri54  |   31 | game51    |
| 112 | siri64  |   32 | game52    |
| 113 | seine04 |   31 | running41 |
| 114 | seine05 |   32 | running42 |
| 115 | seine06 |   33 | running43 |
| 116 | seine07 |   34 | running44 |
| 117 | seine08 |   35 | running45 |
| 118 | seine09 |   36 | running46 |
| 119 | seine14 |   37 | running47 |
| 120 | seine24 |   38 | running48 |
+-----+---------+------+-----------+
20 rows in set (0.00 sec)

1. delete 操作回滚

# 由业务 bug 导致 误删 id >=112 的记录
mysql> delete from t1 where id >=112; 
Query OK, 9 rows affected (0.00 sec)
mysql> select * from t1;
+-----+--------+------+--------+
| id  | name   | age  | hobby  |
+-----+--------+------+--------+
| 101 | siri04 |   21 | game41 |
| 102 | siri05 |   22 | game42 |
| 103 | siri06 |   23 | game43 |
| 104 | siri07 |   24 | game44 |
| 105 | siri08 |   25 | game45 |
| 106 | siri09 |   26 | game46 |
| 107 | siri14 |   27 | game47 |
| 108 | siri24 |   28 | game48 |
| 109 | siri34 |   29 | game49 |
| 110 | siri44 |   30 | game50 |
| 111 | siri54 |   31 | game51 |
+-----+--------+------+--------+
11 rows in set (0.00 sec)
 
# 1. 根据误删的大致时间,从 最近的 binlog 文件中 解析出 原始 SQL 语句。根据位置信息,判断误操作 sql 属于同一个事务。
root@DRBREQ:# python3 binlog2sql/binlog2sql.py -h192.168.98.111 -P3306 -uroot -p'123456' -d d1 -t t1 --start-file='mysql-bin.000001' --start-datetime='2023-04-11 10:55:00' --stop-datetime='2023-04-11 10:58:00'
DELETE FROM `d1`.`t1` WHERE `id`=112 AND `name`='siri64' AND `age`=32 AND `hobby`='game52' LIMIT 1; #start 19811 end 20241 time 2023-04-11 10:56:57
DELETE FROM `d1`.`t1` WHERE `id`=113 AND `name`='seine04' AND `age`=31 AND `hobby`='running41' LIMIT 1; #start 19811 end 20241 time 2023-04-11 10:56:57
DELETE FROM `d1`.`t1` WHERE `id`=114 AND `name`='seine05' AND `age`=32 AND `hobby`='running42' LIMIT 1; #start 19811 end 20241 time 2023-04-11 10:56:57
DELETE FROM `d1`.`t1` WHERE `id`=115 AND `name`='seine06' AND `age`=33 AND `hobby`='running43' LIMIT 1; #start 19811 end 20241 time 2023-04-11 10:56:57
DELETE FROM `d1`.`t1` WHERE `id`=116 AND `name`='seine07' AND `age`=34 AND `hobby`='running44' LIMIT 1; #start 19811 end 20241 time 2023-04-11 10:56:57
DELETE FROM `d1`.`t1` WHERE `id`=117 AND `name`='seine08' AND `age`=35 AND `hobby`='running45' LIMIT 1; #start 19811 end 20241 time 2023-04-11 10:56:57
DELETE FROM `d1`.`t1` WHERE `id`=118 AND `name`='seine09' AND `age`=36 AND `hobby`='running46' LIMIT 1; #start 19811 end 20241 time 2023-04-11 10:56:57
DELETE FROM `d1`.`t1` WHERE `id`=119 AND `name`='seine14' AND `age`=37 AND `hobby`='running47' LIMIT 1; #start 19811 end 20241 time 2023-04-11 10:56:57
DELETE FROM `d1`.`t1` WHERE `id`=120 AND `name`='seine24' AND `age`=38 AND `hobby`='running48' LIMIT 1; #start 19811 end 20241 time 2023-04-11 10:56:57
 
# 2. 生成回滚的语句
root@DRBREQ:# python3 binlog2sql/binlog2sql.py -h192.168.98.111 -P3306 -uroot -p'123456' -d d1 -t t1 --start-file='mysql-bin.000001' --start-datetime='2023-04-11 10:55:00' --stop-datetime='2023-04-11 10:58:00' -B > back.sql
INSERT INTO `d1`.`t1`(`id`, `name`, `age`, `hobby`) VALUES (120, 'seine24', 38, 'running48'); #start 19811 end 20241 time 2023-04-11 10:56:57
INSERT INTO `d1`.`t1`(`id`, `name`, `age`, `hobby`) VALUES (119, 'seine14', 37, 'running47'); #start 19811 end 20241 time 2023-04-11 10:56:57
INSERT INTO `d1`.`t1`(`id`, `name`, `age`, `hobby`) VALUES (118, 'seine09', 36, 'running46'); #start 19811 end 20241 time 2023-04-11 10:56:57
INSERT INTO `d1`.`t1`(`id`, `name`, `age`, `hobby`) VALUES (117, 'seine08', 35, 'running45'); #start 19811 end 20241 time 2023-04-11 10:56:57
INSERT INTO `d1`.`t1`(`id`, `name`, `age`, `hobby`) VALUES (116, 'seine07', 34, 'running44'); #start 19811 end 20241 time 2023-04-11 10:56:57
INSERT INTO `d1`.`t1`(`id`, `name`, `age`, `hobby`) VALUES (115, 'seine06', 33, 'running43'); #start 19811 end 20241 time 2023-04-11 10:56:57
INSERT INTO `d1`.`t1`(`id`, `name`, `age`, `hobby`) VALUES (114, 'seine05', 32, 'running42'); #start 19811 end 20241 time 2023-04-11 10:56:57
INSERT INTO `d1`.`t1`(`id`, `name`, `age`, `hobby`) VALUES (113, 'seine04', 31, 'running41'); #start 19811 end 20241 time 2023-04-11 10:56:57
INSERT INTO `d1`.`t1`(`id`, `name`, `age`, `hobby`) VALUES (112, 'siri64', 32, 'game52'); #start 19811 end 20241 time 2023-04-11 10:56:57
 
# 3. 回滚数据
root@DRBREQ:# mysql -h192.168.98.111 -P3306 -uroot -p'123456' < back.sql
 
# 4. 查看数据表,验证 delete 数据已回退
mysql> select * from t1;
+-----+---------+------+-----------+
| id  | name    | age  | hobby     |
+-----+---------+------+-----------+
| 101 | siri04  |   21 | game41    |
| 102 | siri05  |   22 | game42    |
| 103 | siri06  |   23 | game43    |
| 104 | siri07  |   24 | game44    |
| 105 | siri08  |   25 | game45    |
| 106 | siri09  |   26 | game46    |
| 107 | siri14  |   27 | game47    |
| 108 | siri24  |   28 | game48    |
| 109 | siri34  |   29 | game49    |
| 110 | siri44  |   30 | game50    |
| 111 | siri54  |   31 | game51    |
| 112 | siri64  |   32 | game52    |
| 113 | seine04 |   31 | running41 |
| 114 | seine05 |   32 | running42 |
| 115 | seine06 |   33 | running43 |
| 116 | seine07 |   34 | running44 |
| 117 | seine08 |   35 | running45 |
| 118 | seine09 |   36 | running46 |
| 119 | seine14 |   37 | running47 |
| 120 | seine24 |   38 | running48 |
+-----+---------+------+-----------+
20 rows in set (0.00 sec)

2. update 操作回滚

# 误操作修改了 id < 112 数据行
mysql> update t1 set name='radius' where id < 112;
Query OK, 11 rows affected (0.00 sec)
Rows matched: 11  Changed: 11  Warnings: 0
 
mysql> select * from t1;
+-----+---------+------+-----------+
| id  | name    | age  | hobby     |
+-----+---------+------+-----------+
| 101 | radius  |   21 | game41    |
| 102 | radius  |   22 | game42    |
| 103 | radius  |   23 | game43    |
| 104 | radius  |   24 | game44    |
| 105 | radius  |   25 | game45    |
| 106 | radius  |   26 | game46    |
| 107 | radius  |   27 | game47    |
| 108 | radius  |   28 | game48    |
| 109 | radius  |   29 | game49    |
| 110 | radius  |   30 | game50    |
| 111 | radius  |   31 | game51    |
| 112 | siri64  |   32 | game52    |
| 113 | seine04 |   31 | running41 |
| 114 | seine05 |   32 | running42 |
| 115 | seine06 |   33 | running43 |
| 116 | seine07 |   34 | running44 |
| 117 | seine08 |   35 | running45 |
| 118 | seine09 |   36 | running46 |
| 119 | seine14 |   37 | running47 |
| 120 | seine24 |   38 | running48 |
+-----+---------+------+-----------+
20 rows in set (0.00 sec)
 
# 1. 根据误删的大致时间,从 最近的 binlog 文件中 解析 原始 SQL 语句, 进行数据校验
root@DRBREQ:# python3 binlog2sql/binlog2sql.py -h192.168.98.111 -P3306 -uroot -p'123456' -d d1 -t t1 --start-file='mysql-bin.000001' --start-datetime='2023-04-11 11:10:00' --stop-datetime='2023-04-11 11:15:00'
UPDATE `d1`.`t1` SET `id`=101, `name`='radius', `age`=21, `hobby`='game41' WHERE `id`=101 AND `name`='siri04' AND `age`=21 AND `hobby`='game41' LIMIT 1; #start 22853 end 23603 time 2023-04-11 11:13:31
UPDATE `d1`.`t1` SET `id`=102, `name`='radius', `age`=22, `hobby`='game42' WHERE `id`=102 AND `name`='siri05' AND `age`=22 AND `hobby`='game42' LIMIT 1; #start 22853 end 23603 time 2023-04-11 11:13:31
UPDATE `d1`.`t1` SET `id`=103, `name`='radius', `age`=23, `hobby`='game43' WHERE `id`=103 AND `name`='siri06' AND `age`=23 AND `hobby`='game43' LIMIT 1; #start 22853 end 23603 time 2023-04-11 11:13:31
UPDATE `d1`.`t1` SET `id`=104, `name`='radius', `age`=24, `hobby`='game44' WHERE `id`=104 AND `name`='siri07' AND `age`=24 AND `hobby`='game44' LIMIT 1; #start 22853 end 23603 time 2023-04-11 11:13:31
UPDATE `d1`.`t1` SET `id`=105, `name`='radius', `age`=25, `hobby`='game45' WHERE `id`=105 AND `name`='siri08' AND `age`=25 AND `hobby`='game45' LIMIT 1; #start 22853 end 23603 time 2023-04-11 11:13:31
UPDATE `d1`.`t1` SET `id`=106, `name`='radius', `age`=26, `hobby`='game46' WHERE `id`=106 AND `name`='siri09' AND `age`=26 AND `hobby`='game46' LIMIT 1; #start 22853 end 23603 time 2023-04-11 11:13:31
UPDATE `d1`.`t1` SET `id`=107, `name`='radius', `age`=27, `hobby`='game47' WHERE `id`=107 AND `name`='siri14' AND `age`=27 AND `hobby`='game47' LIMIT 1; #start 22853 end 23603 time 2023-04-11 11:13:31
UPDATE `d1`.`t1` SET `id`=108, `name`='radius', `age`=28, `hobby`='game48' WHERE `id`=108 AND `name`='siri24' AND `age`=28 AND `hobby`='game48' LIMIT 1; #start 22853 end 23603 time 2023-04-11 11:13:31
UPDATE `d1`.`t1` SET `id`=109, `name`='radius', `age`=29, `hobby`='game49' WHERE `id`=109 AND `name`='siri34' AND `age`=29 AND `hobby`='game49' LIMIT 1; #start 22853 end 23603 time 2023-04-11 11:13:31
UPDATE `d1`.`t1` SET `id`=110, `name`='radius', `age`=30, `hobby`='game50' WHERE `id`=110 AND `name`='siri44' AND `age`=30 AND `hobby`='game50' LIMIT 1; #start 22853 end 23603 time 2023-04-11 11:13:31
UPDATE `d1`.`t1` SET `id`=111, `name`='radius', `age`=31, `hobby`='game51' WHERE `id`=111 AND `name`='siri54' AND `age`=31 AND `hobby`='game51' LIMIT 1; #start 22853 end 23603 time 2023-04-11 11:13:31
# 2. 生成回滚的语句。反转binlog 中 set 与 where 的位置
root@DRBREQ:# python3 binlog2sql/binlog2sql.py -h192.168.98.111 -P3306 -uroot -p'123456' -d d1 -t t1 --start-file='mysql-bin.000001' --start-datetime='2023-04-11 11:10:00' --stop-datetime='2023-04-11 11:15:00' -B > back.sql
UPDATE `d1`.`t1` SET `id`=111, `name`='siri54', `age`=31, `hobby`='game51' WHERE `id`=111 AND `name`='radius' AND `age`=31 AND `hobby`='game51' LIMIT 1; #start 22853 end 23603 time 2023-04-11 11:13:31
UPDATE `d1`.`t1` SET `id`=110, `name`='siri44', `age`=30, `hobby`='game50' WHERE `id`=110 AND `name`='radius' AND `age`=30 AND `hobby`='game50' LIMIT 1; #start 22853 end 23603 time 2023-04-11 11:13:31
UPDATE `d1`.`t1` SET `id`=109, `name`='siri34', `age`=29, `hobby`='game49' WHERE `id`=109 AND `name`='radius' AND `age`=29 AND `hobby`='game49' LIMIT 1; #start 22853 end 23603 time 2023-04-11 11:13:31
UPDATE `d1`.`t1` SET `id`=108, `name`='siri24', `age`=28, `hobby`='game48' WHERE `id`=108 AND `name`='radius' AND `age`=28 AND `hobby`='game48' LIMIT 1; #start 22853 end 23603 time 2023-04-11 11:13:31
UPDATE `d1`.`t1` SET `id`=107, `name`='siri14', `age`=27, `hobby`='game47' WHERE `id`=107 AND `name`='radius' AND `age`=27 AND `hobby`='game47' LIMIT 1; #start 22853 end 23603 time 2023-04-11 11:13:31
UPDATE `d1`.`t1` SET `id`=106, `name`='siri09', `age`=26, `hobby`='game46' WHERE `id`=106 AND `name`='radius' AND `age`=26 AND `hobby`='game46' LIMIT 1; #start 22853 end 23603 time 2023-04-11 11:13:31
UPDATE `d1`.`t1` SET `id`=105, `name`='siri08', `age`=25, `hobby`='game45' WHERE `id`=105 AND `name`='radius' AND `age`=25 AND `hobby`='game45' LIMIT 1; #start 22853 end 23603 time 2023-04-11 11:13:31
UPDATE `d1`.`t1` SET `id`=104, `name`='siri07', `age`=24, `hobby`='game44' WHERE `id`=104 AND `name`='radius' AND `age`=24 AND `hobby`='game44' LIMIT 1; #start 22853 end 23603 time 2023-04-11 11:13:31
UPDATE `d1`.`t1` SET `id`=103, `name`='siri06', `age`=23, `hobby`='game43' WHERE `id`=103 AND `name`='radius' AND `age`=23 AND `hobby`='game43' LIMIT 1; #start 22853 end 23603 time 2023-04-11 11:13:31
UPDATE `d1`.`t1` SET `id`=102, `name`='siri05', `age`=22, `hobby`='game42' WHERE `id`=102 AND `name`='radius' AND `age`=22 AND `hobby`='game42' LIMIT 1; #start 22853 end 23603 time 2023-04-11 11:13:31
UPDATE `d1`.`t1` SET `id`=101, `name`='siri04', `age`=21, `hobby`='game41' WHERE `id`=101 AND `name`='radius' AND `age`=21 AND `hobby`='game41' LIMIT 1; #start 22853 end 23603 time 2023-04-11 11:13:31
# 3. 回滚数据
root@DRBREQ:# mysql -h192.168.98.111 -P3306 -uroot -p'123456' < back.sql
# 4. 查看数据表,验证 update 数据已回退
mysql> select * from t1;
+-----+---------+------+-----------+
| id  | name    | age  | hobby     |
+-----+---------+------+-----------+
| 101 | siri04  |   21 | game41    |
| 102 | siri05  |   22 | game42    |
| 103 | siri06  |   23 | game43    |
| 104 | siri07  |   24 | game44    |
| 105 | siri08  |   25 | game45    |
| 106 | siri09  |   26 | game46    |
| 107 | siri14  |   27 | game47    |
| 108 | siri24  |   28 | game48    |
| 109 | siri34  |   29 | game49    |
| 110 | siri44  |   30 | game50    |
| 111 | siri54  |   31 | game51    |
| 112 | siri64  |   32 | game52    |
| 113 | seine04 |   31 | running41 |
| 114 | seine05 |   32 | running42 |
| 115 | seine06 |   33 | running43 |
| 116 | seine07 |   34 | running44 |
| 117 | seine08 |   35 | running45 |
| 118 | seine09 |   36 | running46 |
| 119 | seine14 |   37 | running47 |
| 120 | seine24 |   38 | running48 |
+-----+---------+------+-----------+
参考文档:

https://github.com/danfengcao/binlog2sql
https://tech.meituan.com/2017/11/17/mysql-flashback.html
https://github.com/liuhr/my2sql