Created
June 11, 2018 15:11
-
-
Save dav1x/d499011568db55e145de684ea6878a90 to your computer and use it in GitHub Desktop.
Majority etcd set failure
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Inventory file: | |
[etcd] | |
stretch-master-0.stretch.e2e.bos.redhat.com | |
stretch-master-1.stretch.e2e.bos.redhat.com | |
stretch-master-2.stretch.e2e.bos.redhat.com | |
[etcd-pri] | |
stretch-master-0.stretch.e2e.bos.redhat.com | |
stretch-master-1.stretch.e2e.bos.redhat.com | |
[etcd-sec] | |
stretch-master-2.stretch.e2e.bos.redhat.com | |
--- | |
- name: Simulate DR for ETCD | |
hosts: etcd-pri | |
tasks: | |
- name: Stop the etcd services from primary DC | |
systemd: | |
name: 'etcd' | |
state: stopped | |
[root@stretch-master-0 ~]# oc get node | |
Error from server (Timeout): the server was unable to return a response in the time allotted, but may still be processing the request (get nodes) | |
DR playbook: | |
--- | |
- name: Ensure that we have a rw etcd | |
hosts: etcd-sec[0] | |
tasks: | |
- name: Get the status of the etcd cluster | |
shell: 'etcdctl -C https://{{ ansible_default_ipv4.address }}:2379 --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt cluster-health | grep "^cluster is healthy$" | wc -l' | |
become: True | |
register: etcd_healthy_nodes | |
- name: Ensure we have the configuration folder | |
file: | |
name: '/etc/systemd/system/etcd_container.service.d' | |
state: directory | |
become: True | |
when: etcd_healthy_nodes.stdout == '0' | |
- name: If we don't have the quorum put the cluster in surviving mode | |
lineinfile: | |
dest: /etc/etcd/etcd.conf | |
regexp: ^ETCD_FORCE_NEW_CLUSTER= | |
line: ETCD_FORCE_NEW_CLUSTER=true | |
become: True | |
when: etcd_healthy_nodes.stdout == '0' | |
- name: Restart the etcd service in survive mode | |
systemd: | |
name: 'etcd' | |
state: restarted | |
daemon_reload: True | |
become: True | |
when: etcd_healthy_nodes.stdout == '0' | |
- name: Remove the surviving mode | |
lineinfile: | |
dest: /etc/etcd/etcd.conf | |
regexp: ^ETCD_FORCE_NEW_CLUSTER= | |
state: absent | |
become: True | |
when: etcd_healthy_nodes.stdout == '0' | |
register: change_file_result | |
- name: Reload systemd daemons | |
systemd: | |
name: 'etcd' | |
state: started | |
daemon_reload: True | |
become: True | |
when: change_file_result | changed | |
[root@stretch-master-0 ~]# oc get node | |
NAME STATUS ROLES AGE VERSION | |
stretch-app-0 Ready compute 3d v1.9.1+a0ce1bc657 | |
stretch-app-1 Ready compute 3d v1.9.1+a0ce1bc657 | |
[root@stretch-master-0 ~]# etcdctl -C https://stretch-master-2:2379 --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt cluster-health | |
member 1f43aaf2ecf94cab is healthy: got healthy result from https://10.19.114.17:2379 | |
cluster is healthy | |
Recovery cluster playbook |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment