mongodb从库无法启动一例(replication_recovery.cpp)

发布时间 2023-07-17 09:15:00作者: slnngk

环境:
OS:centos 7
mongodb:4.4.22

背景:
1主1从1仲裁的环境,修改从库的集群ip后,执行如下操作后发现无法启动
myrepl:PRIMARY> rs.remove("192.168.1.104:29001")
myrepl:PRIMARY> conf=rs.conf()
myrepl:PRIMARY> conf.members[1].host="192.168.1.107:29001"
myrepl:PRIMARY> rs.reconfig(conf,{"force":true})
myrepl:PRIMARY>rs.add({ host: "192.168.1.107:29001", priority: 0, votes: 0 })

 

报错信息:
{"t":{"$date":"2023-07-16T20:44:39.569-04:00"},"s":"F",  "c":"REPL",     "id":40313,   "ctx":"initandlisten","msg":"A
pplied op oplogApplicationStartPoint not found","attr":{"oplogApplicationStartPoint":{"":{"$timestamp":{"t":168932683
3,"i":1}}},"topOfOplog":{"":{"$timestamp":{"t":1689325501,"i":1}}}}}
{"t":{"$date":"2023-07-16T20:44:39.569-04:00"},"s":"F",  "c":"-",        "id":23091,   "ctx":"initandlisten","msg":"F
atal assertion","attr":{"msgid":40313,"file":"src/mongo/db/repl/replication_recovery.cpp","line":632}}
{"t":{"$date":"2023-07-16T20:44:39.569-04:00"},"s":"F",  "c":"-",        "id":23092,   "ctx":"initandlisten","msg":"\n\n***aborting after fassert() failure\n\n"}

 

原因是oplog被覆盖了,找不到相应的数据,需要重新初始化该节点:

处理步骤:
删除data目录然后重新启动
[root@localhost mongodb]# cd /home/middle/mongodb
[root@localhost mongodb]# mv data bak_data
[root@localhost mongodb]# mkdir data

[root@localhost conf]# /usr/local/services/mongodb/bin/mongod -f /home/middle/mongodb/conf/mongo.cnf

这个时候查看集群状态

myrepl:PRIMARY> rs.status()
{
        "set" : "myrepl",
        "date" : ISODate("2023-07-17T00:54:53.232Z"),
        "myState" : 1,
        "term" : NumberLong(4),
        "syncSourceHost" : "",
        "syncSourceId" : -1,
        "heartbeatIntervalMillis" : NumberLong(2000),
        "majorityVoteCount" : 2,
        "writeMajorityCount" : 1,
        "votingMembersCount" : 2,
        "writableVotingMembersCount" : 1,
        "optimes" : {
                "lastCommittedOpTime" : {
                        "ts" : Timestamp(1689555291, 1),
                        "t" : NumberLong(4)
                },
                "lastCommittedWallTime" : ISODate("2023-07-17T00:54:51.641Z"),
                "readConcernMajorityOpTime" : {
                        "ts" : Timestamp(1689555291, 1),
                        "t" : NumberLong(4)
                },
                "readConcernMajorityWallTime" : ISODate("2023-07-17T00:54:51.641Z"),
                "appliedOpTime" : {
                        "ts" : Timestamp(1689555291, 1),
                        "t" : NumberLong(4)
                },
                "durableOpTime" : {
                        "ts" : Timestamp(1689555291, 1),
                        "t" : NumberLong(4)
                },
                "lastAppliedWallTime" : ISODate("2023-07-17T00:54:51.641Z"),
                "lastDurableWallTime" : ISODate("2023-07-17T00:54:51.641Z")
        },
        "lastStableRecoveryTimestamp" : Timestamp(1689555251, 1),
        "electionCandidateMetrics" : {
                "lastElectionReason" : "electionTimeout",
                "lastElectionDate" : ISODate("2023-07-17T00:43:31.531Z"),
                "electionTerm" : NumberLong(4),
                "lastCommittedOpTimeAtElection" : {
                        "ts" : Timestamp(0, 0),
                        "t" : NumberLong(-1)
                },
                "lastSeenOpTimeAtElection" : {
                        "ts" : Timestamp(1689328811, 1),
                        "t" : NumberLong(3)
                },
                "numVotesNeeded" : 2,
                "priorityAtElection" : 1,
                "electionTimeoutMillis" : NumberLong(10000),
                "numCatchUpOps" : NumberLong(0),
                "newTermStartDate" : ISODate("2023-07-17T00:43:31.548Z"),
                "wMajorityWriteAvailabilityDate" : ISODate("2023-07-17T00:43:31.655Z")
        },
        "members" : [
                {
                        "_id" : 0,
                        "name" : "192.168.1.102:29001",
                        "health" : 1,
                        "state" : 1,
                        "stateStr" : "PRIMARY",
                        "uptime" : 695,
                        "optime" : {
                                "ts" : Timestamp(1689555291, 1),
                                "t" : NumberLong(4)
                        },
                        "optimeDate" : ISODate("2023-07-17T00:54:51Z"),
                        "lastAppliedWallTime" : ISODate("2023-07-17T00:54:51.641Z"),
                        "lastDurableWallTime" : ISODate("2023-07-17T00:54:51.641Z"),
                        "syncSourceHost" : "",
                        "syncSourceId" : -1,
                        "infoMessage" : "",
                        "electionTime" : Timestamp(1689554611, 1),
                        "electionDate" : ISODate("2023-07-17T00:43:31Z"),
                        "configVersion" : 23819,
                        "configTerm" : -1,
                        "self" : true,
                        "lastHeartbeatMessage" : ""
                },
                {
                        "_id" : 3,
                        "name" : "192.168.1.105:29001",
                        "health" : 1,
                        "state" : 7,
                        "stateStr" : "ARBITER",
                        "uptime" : 688,
                        "lastHeartbeat" : ISODate("2023-07-17T00:54:52.042Z"),
                        "lastHeartbeatRecv" : ISODate("2023-07-17T00:54:53.123Z"),
                        "pingMs" : NumberLong(1),
                        "lastHeartbeatMessage" : "",
                        "syncSourceHost" : "",
                        "syncSourceId" : -1,
                        "infoMessage" : "",
                        "configVersion" : 23819,
                        "configTerm" : -1
                },
                {
                        "_id" : 4,
                        "name" : "192.168.1.107:29001",
                        "health" : 1,
                        "state" : 5,
                        "stateStr" : "STARTUP2",
                        "uptime" : 61,
                        "optime" : {
                                "ts" : Timestamp(0, 0),
                                "t" : NumberLong(-1)
                        },
                        "optimeDurable" : {
                                "ts" : Timestamp(0, 0),
                                "t" : NumberLong(-1)
                        },
                        "optimeDate" : ISODate("1970-01-01T00:00:00Z"),
                        "optimeDurableDate" : ISODate("1970-01-01T00:00:00Z"),
                        "lastAppliedWallTime" : ISODate("1970-01-01T00:00:00Z"),
                        "lastDurableWallTime" : ISODate("1970-01-01T00:00:00Z"),
                        "lastHeartbeat" : ISODate("2023-07-17T00:54:52.518Z"),
                        "lastHeartbeatRecv" : ISODate("2023-07-17T00:54:52.412Z"),
                        "pingMs" : NumberLong(5),
                        "lastHeartbeatMessage" : "",
                        "syncSourceHost" : "192.168.1.102:29001",
                        "syncSourceId" : 0,
                        "infoMessage" : "",
                        "configVersion" : 23819,
                        "configTerm" : -1
                }
        ],
        "ok" : 1
}

 

目前该节点处于STARTUP2状态
startup2:整个初始化同步过程都处于这个状态
同步数据的过程中,新节点的状态stateStr 为STARTUP2,待同步完成后会变为SECONDARY.