MongoDB副本集故障测试和解决方案

一、环境

$ cat /etc/redhat-release 
CentOS Linux release 7.0.1406 (Core) 
$ uname -a
Linux zhaopin-2-201 3.10.0-123.el7.x86_64 #1 SMP Mon Jun 30 12:09:22 UTC 2014 x86_64 x86_64 x86_64 GNU/Linux
$ mongo
MongoDB shell version: 3.0.6
connecting to: test
rs0:PRIMARY> rs.status();
{
        "set" : "rs0",
        "date" : ISODate("2015-09-28T07:00:05.507Z"),
        "myState" : 1,
        "members" : [ 
                {
                        "_id" : 0,
                        "name" : "172.30.2.201:27017",
                        "health" : 1,
                        "state" : 1,
                        "stateStr" : "PRIMARY",
                        "uptime" : 83, 
                        "optime" : Timestamp(1443423600, 1), 
                        "optimeDate" : ISODate("2015-09-28T07:00:00Z"),
                        "electionTime" : Timestamp(1443423535, 2), 
                        "electionDate" : ISODate("2015-09-28T06:58:55Z"),
                        "configVersion" : 3,
                        "self" : true
                },
                {
                        "_id" : 1,
                        "name" : "172.30.2.203:27017",
                        "health" : 1,
                        "state" : 2,
                        "stateStr" : "SECONDARY",
                        "uptime" : 44, 
                        "optime" : Timestamp(1443423600, 1), 
                        "optimeDate" : ISODate("2015-09-28T07:00:00Z"),
                        "lastHeartbeat" : ISODate("2015-09-28T07:00:04.918Z"),
                        "lastHeartbeatRecv" : ISODate("2015-09-28T07:00:05.042Z"),
                        "pingMs" : 0,
                        "syncingTo" : "172.30.2.201:27017",
                        "configVersion" : 3 
                },
                {
                        "_id" : 2,
                        "name" : "172.30.2.202:27017",
                        "health" : 1,
                        "state" : 5,
                        "stateStr" : "STARTUP2",
                        "uptime" : 4,
                        "optime" : Timestamp(0, 0), 
                        "optimeDate" : ISODate("1970-01-01T00:00:00Z"),
                        "lastHeartbeat" : ISODate("2015-09-28T07:00:04.918Z"),
                        "lastHeartbeatRecv" : ISODate("2015-09-28T07:00:04.926Z"),
                        "pingMs" : 0,
                        "configVersion" : 3 
                }
        ],
        "ok" : 1 
}

二、单节点故障

1.primary节点故障

1）关闭primay节点

rs0:PRIMARY> use admin;
switched to db admin
rs0:PRIMARY> db.shutdownServer();
2015-09-28T15:00:51.828+0800 I NETWORK  DBClientCursor::init call() failed
server should be down...
2015-09-28T15:00:51.830+0800 I NETWORK  trying reconnect to 127.0.0.1:27017 (127.0.0.1) failed
2015-09-28T15:00:51.831+0800 I NETWORK  reconnect 127.0.0.1:27017 (127.0.0.1) ok
2015-09-28T15:00:51.831+0800 I NETWORK  DBClientCursor::init call() failed
>
bye

2）查看集群状态

$ mongo
MongoDB shell version: 3.0.6
connecting to: test
rs0:SECONDARY> rs.status();
{
        "set" : "rs0",
        "date" : ISODate("2015-09-28T07:01:28.818Z"),
        "myState" : 2,
        "members" : [
                {
                        "_id" : 0,
                        "name" : "172.30.2.201:27017",
                        "health" : 0,
                        "state" : 8,
                        "stateStr" : "(not reachable/healthy)",
                        "uptime" : 0,
                        "optime" : Timestamp(0, 0),
                        "optimeDate" : ISODate("1970-01-01T00:00:00Z"),
                        "lastHeartbeat" : ISODate("2015-09-28T07:01:27.006Z"),
                        "lastHeartbeatRecv" : ISODate("2015-09-28T07:00:50.935Z"),
                        "pingMs" : 0,
                        "lastHeartbeatMessage" : "Failed attempt to connect to 172.30.2.201:27017; couldn't connect to server 172.30.2.201:27017 (172.30.2.201), connection attempt failed",
                        "configVersion" : -1
                },
                {
                        "_id" : 1,
                        "name" : "172.30.2.203:27017",
                        "health" : 1,
                        "state" : 1,
                        "stateStr" : "PRIMARY",
                        "uptime" : 87,
                        "optime" : Timestamp(1443423600, 1),
                        "optimeDate" : ISODate("2015-09-28T07:00:00Z"),
                        "lastHeartbeat" : ISODate("2015-09-28T07:01:26.963Z"),
                        "lastHeartbeatRecv" : ISODate("2015-09-28T07:01:27.078Z"),
                        "pingMs" : 0,
                        "electionTime" : Timestamp(1443423653, 1),
                        "electionDate" : ISODate("2015-09-28T07:00:53Z"),
                        "configVersion" : 3
                },
                {
                        "_id" : 2,
                        "name" : "172.30.2.202:27017",
                        "health" : 1,
                        "state" : 2,
                        "stateStr" : "SECONDARY",
                        "uptime" : 90,
                        "optime" : Timestamp(1443423600, 1),
                        "optimeDate" : ISODate("2015-09-28T07:00:00Z"),
                        "configVersion" : 3,
                        "self" : true
                }
        ],
        "ok" : 1
}

发现集群进行了自动切换，把172.30.2.202：27017变为了primary

3）启动原来的primary

$ sudo /opt/mongodb/bin/mongod --config /data/mongodb/conf/db0/mongodb.conf
about to fork child process, waiting until server is ready for connections.
forked process: 25738
child process started successfully, parent exiting
$ mongo
MongoDB shell version: 3.0.6
connecting to: test
rs0:PRIMARY> rs.status();
{
        "set" : "rs0",
        "date" : ISODate("2015-09-28T07:02:24.312Z"),
        "myState" : 1,
        "members" : [
                {
                        "_id" : 0,
                        "name" : "172.30.2.201:27017",
                        "health" : 1,
                        "state" : 2,
                        "stateStr" : "SECONDARY",
                        "uptime" : 13,
                        "optime" : Timestamp(1443423600, 1),
                        "optimeDate" : ISODate("2015-09-28T07:00:00Z"),
                        "lastHeartbeat" : ISODate("2015-09-28T07:02:23.189Z"),
                        "lastHeartbeatRecv" : ISODate("2015-09-28T07:02:22.873Z"),
                        "pingMs" : 0,
                        "configVersion" : 3
                },
                {
                        "_id" : 1,
                        "name" : "172.30.2.203:27017",
                        "health" : 1,
                        "state" : 1,
                        "stateStr" : "PRIMARY",
                        "uptime" : 185,
                        "optime" : Timestamp(1443423600, 1),
                        "optimeDate" : ISODate("2015-09-28T07:00:00Z"),
                        "electionTime" : Timestamp(1443423653, 1),
                        "electionDate" : ISODate("2015-09-28T07:00:53Z"),
                        "configVersion" : 3,
                        "self" : true
                },
                {
                        "_id" : 2,
                        "name" : "172.30.2.202:27017",
                        "health" : 1,
                        "state" : 2,
                        "stateStr" : "SECONDARY",
                        "uptime" : 143,
                        "optime" : Timestamp(1443423600, 1),
                        "optimeDate" : ISODate("2015-09-28T07:00:00Z"),
                        "lastHeartbeat" : ISODate("2015-09-28T07:02:23.103Z"),
                        "lastHeartbeatRecv" : ISODate("2015-09-28T07:02:22.990Z"),
                        "pingMs" : 0,
                        "configVersion" : 3
                }
        ],
        "ok" : 1
}

发现原来的primary自动切为了secondary

2.secondary节点故障

1）关闭secondary节点

$ mongo
MongoDB shell version: 3.0.6
connecting to: test
rs0:SECONDARY> use admin;
switched to db admin
rs0:SECONDARY> db.shutdownServer();
2015-09-28T15:04:39.064+0800 I NETWORK  DBClientCursor::init call() failed
server should be down...
2015-09-28T15:04:39.066+0800 I NETWORK  trying reconnect to 127.0.0.1:27017 (127.0.0.1) failed
2015-09-28T15:04:39.067+0800 W NETWORK  Failed to connect to 127.0.0.1:27017, reason: errno:111 Connection refused
2015-09-28T15:04:39.067+0800 I NETWORK  reconnect 127.0.0.1:27017 (127.0.0.1) failed failed couldn't connect to server 127.0.0.1:27017 (127.0.0.1), connection attempt failed
2015-09-28T15:04:39.070+0800 I NETWORK  trying reconnect to 127.0.0.1:27017 (127.0.0.1) failed
2015-09-28T15:04:39.070+0800 W NETWORK  Failed to connect to 127.0.0.1:27017, reason: errno:111 Connection refused
2015-09-28T15:04:39.070+0800 I NETWORK  reconnect 127.0.0.1:27017 (127.0.0.1) failed failed couldn't connect to server 127.0.0.1:27017 (127.0.0.1), connection attempt failed
>
bye

2）查看集群状态

$ mongo
MongoDB shell version: 3.0.6
connecting to: test
rs0:PRIMARY> rs.status();
{
        "set" : "rs0",
        "date" : ISODate("2015-09-28T07:05:12.140Z"),
        "myState" : 1,
        "members" : [
                {
                        "_id" : 0,
                        "name" : "172.30.2.201:27017",
                        "health" : 1,
                        "state" : 2,
                        "stateStr" : "SECONDARY",
                        "uptime" : 180,
                        "optime" : Timestamp(1443423600, 1),
                        "optimeDate" : ISODate("2015-09-28T07:00:00Z"),
                        "lastHeartbeat" : ISODate("2015-09-28T07:05:11.265Z"),
                        "lastHeartbeatRecv" : ISODate("2015-09-28T07:05:10.951Z"),
                        "pingMs" : 0,
                        "configVersion" : 3
                },
                {
                        "_id" : 1,
                        "name" : "172.30.2.203:27017",
                        "health" : 1,
                        "state" : 1,
                        "stateStr" : "PRIMARY",
                        "uptime" : 353,
                        "optime" : Timestamp(1443423600, 1),
                        "optimeDate" : ISODate("2015-09-28T07:00:00Z"),
                        "electionTime" : Timestamp(1443423653, 1),
                        "electionDate" : ISODate("2015-09-28T07:00:53Z"),
                        "configVersion" : 3,
                        "self" : true
                },
                {
                        "_id" : 2,
                        "name" : "172.30.2.202:27017",
                        "health" : 0,
                        "state" : 8,
                        "stateStr" : "(not reachable/healthy)",
                        "uptime" : 0,
                        "optime" : Timestamp(0, 0),
                        "optimeDate" : ISODate("1970-01-01T00:00:00Z"),
                        "lastHeartbeat" : ISODate("2015-09-28T07:05:11.226Z"),
                        "lastHeartbeatRecv" : ISODate("2015-09-28T07:04:37.055Z"),
                        "pingMs" : 0,
                        "lastHeartbeatMessage" : "Failed attempt to connect to 172.30.2.202:27017; couldn't connect to server 172.30.2.202:27017 (172.30.2.202), connection attempt failed",
                        "configVersion" : -1
                }
        ],
        "ok" : 1
}

可见单个secondary节点故障对集群没有影响

3）再启动secondary

$ sudo /opt/mongodb/bin/mongod --config /data/mongodb/conf/db0/mongodb.conf
about to fork child process, waiting until server is ready for connections.
forked process: 49507
child process started successfully, parent exiting
$ mongo
MongoDB shell version: 3.0.6
connecting to: test
rs0:SECONDARY> rs.status();
{
        "set" : "rs0",
        "date" : ISODate("2015-09-28T07:06:41.733Z"),
        "myState" : 2,
        "members" : [
                {
                        "_id" : 0,
                        "name" : "172.30.2.201:27017",
                        "health" : 1,
                        "state" : 2,
                        "stateStr" : "SECONDARY",
                        "uptime" : 12,
                        "optime" : Timestamp(1443423600, 1),
                        "optimeDate" : ISODate("2015-09-28T07:00:00Z"),
                        "lastHeartbeat" : ISODate("2015-09-28T07:06:40.999Z"),
                        "lastHeartbeatRecv" : ISODate("2015-09-28T07:06:41.233Z"),
                        "pingMs" : 0,
                        "lastHeartbeatMessage" : "could not find member to sync from",
                        "configVersion" : 3
                },
                {
                        "_id" : 1,
                        "name" : "172.30.2.203:27017",
                        "health" : 1,
                        "state" : 1,
                        "stateStr" : "PRIMARY",
                        "uptime" : 12,
                        "optime" : Timestamp(1443423600, 1),
                        "optimeDate" : ISODate("2015-09-28T07:00:00Z"),
                        "lastHeartbeat" : ISODate("2015-09-28T07:06:40.999Z"),
                        "lastHeartbeatRecv" : ISODate("2015-09-28T07:06:41.360Z"),
                        "pingMs" : 0,
                        "electionTime" : Timestamp(1443423653, 1),
                        "electionDate" : ISODate("2015-09-28T07:00:53Z"),
                        "configVersion" : 3
                },
                {
                        "_id" : 2,
                        "name" : "172.30.2.202:27017",
                        "health" : 1,
                        "state" : 2,
                        "stateStr" : "SECONDARY",
                        "uptime" : 13,
                        "optime" : Timestamp(1443423600, 1),
                        "optimeDate" : ISODate("2015-09-28T07:00:00Z"),
                        "configVersion" : 3,
                        "self" : true
                }
        ],
        "ok" : 1
}

重新启动后又重新连上了集群

三、多节点故障

1.primary和secondary节点同时故障

1）停掉一个secondary节点

$ mongo
MongoDB shell version: 3.0.6
connecting to: test
rs0:SECONDARY> use admin;
switched to db admin
rs0:SECONDARY> db.shutdownServer();
2015-09-28T15:10:43.049+0800 I NETWORK  DBClientCursor::init call() failed
server should be down...
2015-09-28T15:10:43.051+0800 I NETWORK  trying reconnect to 127.0.0.1:27017 (127.0.0.1) failed
2015-09-28T15:10:43.052+0800 W NETWORK  Failed to connect to 127.0.0.1:27017, reason: errno:111 Connection refused
2015-09-28T15:10:43.052+0800 I NETWORK  reconnect 127.0.0.1:27017 (127.0.0.1) failed failed couldn't connect to server 127.0.0.1:27017 (127.0.0.1), connection attempt failed
2015-09-28T15:10:43.055+0800 I NETWORK  trying reconnect to 127.0.0.1:27017 (127.0.0.1) failed
2015-09-28T15:10:43.055+0800 W NETWORK  Failed to connect to 127.0.0.1:27017, reason: errno:111 Connection refused
2015-09-28T15:10:43.055+0800 I NETWORK  reconnect 127.0.0.1:27017 (127.0.0.1) failed failed couldn't connect to server 127.0.0.1:27017 (127.0.0.1), connection attempt failed

2）停掉primary节点

$ mongoMongoDB shell version: 3.0.6connecting to: testrs0:PRIMARY> use admin;switched to db adminrs0:PRIMARY> db.shutdownServer();2015-09-28T15:10:53.069+0800 I NETWORK  DBClientCursor::init call() failedserver should be down...2015-09-28T15:10:53.072+0800 I NETWORK  trying reconnect to 127.0.0.1:27017 (127.0.0.1) failed2015-09-28T15:10:53.073+0800 I NETWORK  reconnect 127.0.0.1:27017 (127.0.0.1) ok2015-09-28T15:10:53.073+0800 I NETWORK  DBClientCursor::init call() failed2015-09-28T15:10:53.076+0800 I NETWORK  trying reconnect to 127.0.0.1:27017 (127.0.0.1) failed2015-09-28T15:10:53.076+0800 I NETWORK  reconnect 127.0.0.1:27017 (127.0.0.1) ok2015-09-28T15:10:53.888+0800 I NETWORK  Socket recv() errno:104 Connection reset by peer 127.0.0.1:270172015-09-28T15:10:53.888+0800 I NETWORK  SocketException: remote: 127.0.0.1:27017 error: 9001 socket exception [RECV_ERROR] server [127.0.0.1:27017]2015-09-28T15:10:53.888+0800 I NETWORK  DBClientCursor::init call() failed

3）查看集群状态

$ mongo
MongoDB shell version: 3.0.6
connecting to: test
rs0:SECONDARY> rs.status();
{
        "set" : "rs0",
        "date" : ISODate("2015-09-28T07:12:10.946Z"),
        "myState" : 2,
        "members" : [
                {
                        "_id" : 0,
                        "name" : "172.30.2.201:27017",
                        "health" : 1,
                        "state" : 2,
                        "stateStr" : "SECONDARY",
                        "uptime" : 600,
                        "optime" : Timestamp(1443423600, 1),
                        "optimeDate" : ISODate("2015-09-28T07:00:00Z"),
                        "configVersion" : 3,
                        "self" : true
                },
                {
                        "_id" : 1,
                        "name" : "172.30.2.203:27017",
                        "health" : 0,
                        "state" : 8,
                        "stateStr" : "(not reachable/healthy)",
                        "uptime" : 0,
                        "optime" : Timestamp(0, 0),
                        "optimeDate" : ISODate("1970-01-01T00:00:00Z"),
                        "lastHeartbeat" : ISODate("2015-09-28T07:12:10.008Z"),
                        "lastHeartbeatRecv" : ISODate("2015-09-28T07:10:51.422Z"),
                        "pingMs" : 0,
                        "lastHeartbeatMessage" : "Failed attempt to connect to 172.30.2.203:27017; couldn't connect to server 172.30.2.203:27017 (172.30.2.203), connection attempt failed",
                        "configVersion" : -1
                },
                {
                        "_id" : 2,
                        "name" : "172.30.2.202:27017",
                        "health" : 0,
                        "state" : 8,
                        "stateStr" : "(not reachable/healthy)",
                        "uptime" : 0,
                        "optime" : Timestamp(0, 0),
                        "optimeDate" : ISODate("1970-01-01T00:00:00Z"),
                        "lastHeartbeat" : ISODate("2015-09-28T07:12:09.477Z"),
                        "lastHeartbeatRecv" : ISODate("2015-09-28T07:10:41.112Z"),
                        "pingMs" : 0,
                        "lastHeartbeatMessage" : "Failed attempt to connect to 172.30.2.202:27017; couldn't connect to server 172.30.2.202:27017 (172.30.2.202), connection attempt failed",
                        "configVersion" : -1
                }
        ],
        "ok" : 1
}

只剩下一个secondary节点，集群变得不可用了

4）解决方案

重新配置：

rs0:SECONDARY> cfg={_id:"rs0", members:[ {_id:0,host:"172.30.2.201:27017"}] }
{
        "_id" : "rs0",
        "members" : [
                {
                        "_id" : 0,
                        "host" : "172.30.2.201:27017"
                }
        ]
}
rs0:SECONDARY> rs.reconfig(cfg, {force:true});
{ "ok" : 1 }
rs0:PRIMARY> rs.status();
{
        "set" : "rs0",
        "date" : ISODate("2015-09-28T07:14:09.350Z"),
        "myState" : 1,
        "members" : [
                {
                        "_id" : 0,
                        "name" : "172.30.2.201:27017",
                        "health" : 1,
                        "state" : 1,
                        "stateStr" : "PRIMARY",
                        "uptime" : 719,
                        "optime" : Timestamp(1443423600, 1),
                        "optimeDate" : ISODate("2015-09-28T07:00:00Z"),
                        "electionTime" : Timestamp(1443424428, 1),
                        "electionDate" : ISODate("2015-09-28T07:13:48Z"),
                        "configVersion" : 71840,
                        "self" : true
                }
        ],
        "ok" : 1
}

此时就变成了单primary节点，可以提供读写服务，然后再制作secondary节点

2.两个secondary节点故障

1）故障前状态

rs0:PRIMARY> rs.status();
{
        "set" : "rs0",
        "date" : ISODate("2015-09-28T07:16:06.571Z"),
        "myState" : 1,
        "members" : [
                {
                        "_id" : 0,
                        "name" : "172.30.2.201:27017",
                        "health" : 1,
                        "state" : 1,
                        "stateStr" : "PRIMARY",
                        "uptime" : 836,
                        "optime" : Timestamp(1443424538, 1),
                        "optimeDate" : ISODate("2015-09-28T07:15:38Z"),
                        "electionTime" : Timestamp(1443424534, 1),
                        "electionDate" : ISODate("2015-09-28T07:15:34Z"),
                        "configVersion" : 71842,
                        "self" : true
                },
                {
                        "_id" : 1,
                        "name" : "172.30.2.202:27017",
                        "health" : 1,
                        "state" : 2,
                        "stateStr" : "SECONDARY",
                        "uptime" : 31,
                        "optime" : Timestamp(1443424538, 1),
                        "optimeDate" : ISODate("2015-09-28T07:15:38Z"),
                        "lastHeartbeat" : ISODate("2015-09-28T07:16:06.230Z"),
                        "lastHeartbeatRecv" : ISODate("2015-09-28T07:16:06.089Z"),
                        "pingMs" : 0,
                        "configVersion" : 71842
                },
                {
                        "_id" : 2,
                        "name" : "172.30.2.203:27017",
                        "health" : 1,
                        "state" : 2,
                        "stateStr" : "SECONDARY",
                        "uptime" : 26,
                        "optime" : Timestamp(1443424538, 1),
                        "optimeDate" : ISODate("2015-09-28T07:15:38Z"),
                        "lastHeartbeat" : ISODate("2015-09-28T07:16:06.229Z"),
                        "lastHeartbeatRecv" : ISODate("2015-09-28T07:16:06.233Z"),
                        "pingMs" : 0,
                        "configVersion" : 71842
                }
        ],
        "ok" : 1
}

2）停掉两个secondary节点

在两个secondary节点分别执行：

$ mongo
MongoDB shell version: 3.0.6
connecting to: test
rs0:SECONDARY> use admin;
switched to db admin
rs0:SECONDARY> db.shutdownServer();
2015-09-28T15:18:11.114+0800 I NETWORK  DBClientCursor::init call() failed
server should be down...
2015-09-28T15:18:11.117+0800 I NETWORK  trying reconnect to 127.0.0.1:27017 (127.0.0.1) failed
2015-09-28T15:18:11.118+0800 W NETWORK  Failed to connect to 127.0.0.1:27017, reason: errno:111 Connection refused
2015-09-28T15:18:11.118+0800 I NETWORK  reconnect 127.0.0.1:27017 (127.0.0.1) failed failed couldn't connect to server 127.0.0.1:27017 (127.0.0.1), connection attempt failed
2015-09-28T15:18:11.121+0800 I NETWORK  trying reconnect to 127.0.0.1:27017 (127.0.0.1) failed
2015-09-28T15:18:11.121+0800 W NETWORK  Failed to connect to 127.0.0.1:27017, reason: errno:111 Connection refused
2015-09-28T15:18:11.121+0800 I NETWORK  reconnect 127.0.0.1:27017 (127.0.0.1) failed failed couldn't connect to server 127.0.0.1:27017 (127.0.0.1), connection attempt failed
>
bye

3）查看集群状态

rs0:SECONDARY> rs.status();
{
        "set" : "rs0",
        "date" : ISODate("2015-09-28T07:19:09.196Z"),
        "myState" : 2,
        "members" : [
                {
                        "_id" : 0,
                        "name" : "172.30.2.201:27017",
                        "health" : 1,
                        "state" : 2,
                        "stateStr" : "SECONDARY",
                        "uptime" : 1019,
                        "optime" : Timestamp(1443424538, 1),
                        "optimeDate" : ISODate("2015-09-28T07:15:38Z"),
                        "configVersion" : 71842,
                        "self" : true
                },
                {
                        "_id" : 1,
                        "name" : "172.30.2.202:27017",
                        "health" : 0,
                        "state" : 8,
                        "stateStr" : "(not reachable/healthy)",
                        "uptime" : 0,
                        "optime" : Timestamp(0, 0),
                        "optimeDate" : ISODate("1970-01-01T00:00:00Z"),
                        "lastHeartbeat" : ISODate("2015-09-28T07:19:08.371Z"),
                        "lastHeartbeatRecv" : ISODate("2015-09-28T07:18:10.147Z"),
                        "pingMs" : 0,
                        "lastHeartbeatMessage" : "Failed attempt to connect to 172.30.2.202:27017; couldn't connect to server 172.30.2.202:27017 (172.30.2.202), connection attempt failed",
                        "configVersion" : -1
                },
                {
                        "_id" : 2,
                        "name" : "172.30.2.203:27017",
                        "health" : 0,
                        "state" : 8,
                        "stateStr" : "(not reachable/healthy)",
                        "uptime" : 0,
                        "optime" : Timestamp(0, 0),
                        "optimeDate" : ISODate("1970-01-01T00:00:00Z"),
                        "lastHeartbeat" : ISODate("2015-09-28T07:19:08.350Z"),
                        "lastHeartbeatRecv" : ISODate("2015-09-28T07:18:34.298Z"),
                        "pingMs" : 0,
                        "lastHeartbeatMessage" : "Failed attempt to connect to 172.30.2.203:27017; couldn't connect to server 172.30.2.203:27017 (172.30.2.203), connection attempt failed",
                        "configVersion" : -1
                }
        ],
        "ok" : 1
}

可见剩下的primary节点自动变为了secondary节点，集群变得不可用了

4）解决方案

rs0:SECONDARY> cfg={_id:"rs0", members:[ {_id:0,host:"172.30.2.201:27017"}] }
{
        "_id" : "rs0",
        "members" : [
                {
                        "_id" : 0,
                        "host" : "172.30.2.201:27017"
                }
        ]
}
rs0:SECONDARY> rs.reconfig(cfg, {force:true});
{ "ok" : 1 }
rs0:PRIMARY> rs.status();
{
        "set" : "rs0",
        "date" : ISODate("2015-09-28T07:20:08.099Z"),
        "myState" : 1,
        "members" : [
                {
                        "_id" : 0,
                        "name" : "172.30.2.201:27017",
                        "health" : 1,
                        "state" : 1,
                        "stateStr" : "PRIMARY",
                        "uptime" : 1078,
                        "optime" : Timestamp(1443424538, 1),
                        "optimeDate" : ISODate("2015-09-28T07:15:38Z"),
                        "electionTime" : Timestamp(1443424795, 1),
                        "electionDate" : ISODate("2015-09-28T07:19:55Z"),
                        "configVersion" : 127342,
                        "self" : true
                }
        ],
        "ok" : 1
}

处理方法和上面的相同，也是强制将剩下的secondary节点配置为单primary节点