Skip to content

Backup/Restore ETCD for Kubernetes

homepage-banner

Install etcd client

sudo apt install etcd-client

Confirm configuration

Inspect etcd configuration to determine database and certificates location.

sudo cat /etc/kubernetes/manifests/etcd.yaml
apiVersion: v1
kind: Pod
metadata:
  annotations:
    kubeadm.kubernetes.io/etcd.advertise-client-urls: https://192.168.8.200:2379
  creationTimestamp: null
  labels:
    component: etcd
    tier: control-plane
  name: etcd
  namespace: kube-system
spec:
  containers:
  - command:
    - etcd
    - --advertise-client-urls=https://192.168.8.200:2379
    - --cert-file=/etc/kubernetes/pki/etcd/server.crt
    - --client-cert-auth=true
    - --data-dir=/var/lib/etcd
    - --experimental-initial-corrupt-check=true
    - --experimental-watch-progress-notify-interval=5s
    - --initial-advertise-peer-urls=https://192.168.8.200:2380
    - --initial-cluster=kubernetes-1=https://192.168.8.200:2380
    - --key-file=/etc/kubernetes/pki/etcd/server.key
    - --listen-client-urls=https://127.0.0.1:2379,https://192.168.8.200:2379
    - --listen-metrics-urls=http://127.0.0.1:2381
    - --listen-peer-urls=https://192.168.8.200:2380
    - --name=kubernetes-1
    - --peer-cert-file=/etc/kubernetes/pki/etcd/peer.crt
    - --peer-client-cert-auth=true
    - --peer-key-file=/etc/kubernetes/pki/etcd/peer.key
    - --peer-trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt
    - --snapshot-count=10000
    - --trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt
    image: registry.k8s.io/etcd:3.5.7-0
    imagePullPolicy: IfNotPresent
    livenessProbe:
      failureThreshold: 8
      httpGet:
        host: 127.0.0.1
        path: /health?exclude=NOSPACE&serializable=true
        port: 2381
        scheme: HTTP
      initialDelaySeconds: 10
      periodSeconds: 10
      timeoutSeconds: 15
    name: etcd
    resources:
      requests:
        cpu: 100m
        memory: 100Mi
    startupProbe:
      failureThreshold: 24
      httpGet:
        host: 127.0.0.1
        path: /health?serializable=false
        port: 2381
        scheme: HTTP
      initialDelaySeconds: 10
      periodSeconds: 10
      timeoutSeconds: 15
    volumeMounts:
    - mountPath: /var/lib/etcd
      name: etcd-data
    - mountPath: /etc/kubernetes/pki/etcd
      name: etcd-certs
  hostNetwork: true
  priority: 2000001000
  priorityClassName: system-node-critical
  securityContext:
    seccompProfile:
      type: RuntimeDefault
  volumes:
  - hostPath:
      path: /etc/kubernetes/pki/etcd
      type: DirectoryOrCreate
    name: etcd-certs
  - hostPath:
      path: /var/lib/etcd
      type: DirectoryOrCreate
    name: etcd-data
status: {}

List etcd certificates

ls /etc/kubernetes/pki/etcd/
ca.crt  ca.key  healthcheck-client.crt  healthcheck-client.key  peer.crt  peer.key  server.crt  server.key

List database files

sudo tree /var/lib/etcd/
/var/lib/etcd/
└── member
    ├── snap
    │   ├── 0000000000000003-0000000000099d32.snap
    │   ├── 0000000000000003-000000000009c443.snap
    │   ├── 0000000000000003-000000000009eb54.snap
    │   ├── 0000000000000003-00000000000a1265.snap
    │   ├── 0000000000000003-00000000000a3976.snap
    │   └── db
    └── wal
        ├── 0000000000000005-0000000000058f3c.wal
        ├── 0000000000000006-0000000000062557.wal
        ├── 0000000000000007-000000000006f9da.wal
        ├── 0000000000000008-000000000007e250.wal
        ├── 0000000000000009-0000000000092ab2.wal
        └── 1.tmp

3 directories, 12 files

Check connection to etcd server

sudo ETCDCTL_API=3 etcdctl --write-out table --endpoints https://127.0.0.1:2379 --cacert /etc/kubernetes/pki/etcd/ca.crt --cert /etc/kubernetes/pki/etcd/server.crt --key /etc/kubernetes/pki/etcd/server.key member list
+-----------------+---------+--------------+----------------------------+----------------------------+
|       ID        | STATUS  |     NAME     |         PEER ADDRS         |        CLIENT ADDRS        |
+-----------------+---------+--------------+----------------------------+----------------------------+
| 14a3fa8f30c99a0 | started | kubernetes-1 | https://192.168.8.200:2380 | https://192.168.8.200:2379 |
+-----------------+---------+--------------+----------------------------+----------------------------+
sudo ETCDCTL_API=3 etcdctl --write-out table --endpoints 127.0.0.1:2379 --cacert /etc/kubernetes/pki/etcd/ca.crt --cert /etc/kubernetes/pki/etcd/server.crt --key /etc/kubernetes/pki/etcd
/server.key endpoint status
+----------------+-----------------+---------+---------+-----------+-----------+------------+
|    ENDPOINT    |       ID        | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX |
+----------------+-----------------+---------+---------+-----------+-----------+------------+
| 127.0.0.1:2379 | 14a3fa8f30c99a0 |   3.5.7 |  153 MB |      true |         3 |     632537 |
+----------------+-----------------+---------+---------+-----------+-----------+------------+

Create snapshot

sudo ETCDCTL_API=3 etcdctl --endpoints https://127.0.0.1:2379 --cacert /etc/kubernetes/pki/etcd/ca.crt --cert /etc/kubernetes/pki/etcd/server.crt --key /etc/kubernetes/pki/etcd/server.key snapshot save etcd_snap.db
2023-08-03 16:40:35.188677 I | clientv3: opened snapshot stream; downloading
2023-08-03 16:40:37.749969 I | clientv3: completed snapshot read; closing
Snapshot saved at etcd_snap.db

Inspect created snapshot

sudo ETCDCTL_API=3 etcdctl --endpoints https://127.0.0.1:2379 --cacert /etc/kubernetes/pki/etcd/ca.crt --cert /etc/kubernetes/pki/etcd/server.crt --key /etc/kubernetes/pki/etcd/server.key snapshot status etcd_snap.db
2cd9c811, 569734, 1394, 153 MB
sudo ETCDCTL_API=3 etcdctl --endpoints https://127.0.0.1:2379 --cacert /etc/kubernetes/pki/etcd/ca.crt --cert /etc/kubernetes/pki/etcd/server.crt --key /etc/kubernetes/pki/etcd/server.key snapshot --write-out=json status etcd_snap.db
{"hash":752470033,"revision":569734,"totalKey":1394,"totalSize":153206784}
sudo ETCDCTL_API=3 etcdctl --endpoints https://127.0.0.1:2379 --cacert /etc/kubernetes/pki/etcd/ca.crt --cert /etc/kubernetes/pki/etcd/server.crt --key /etc/kubernetes/pki/etcd/server.key snapshot --write-out=table status etcd_snap.db
+----------+----------+------------+------------+
|   HASH   | REVISION | TOTAL KEYS | TOTAL SIZE |
+----------+----------+------------+------------+
| 2cd9c811 |   569734 |       1394 |     153 MB |
+----------+----------+------------+------------+

Restore snapshot

sudo ETCDCTL_API=3 etcdctl --endpoints https://127.0.0.1:2379 --cacert /etc/kubernetes/pki/etcd/ca.crt --cert /etc/kubernetes/pki/etcd/server.crt --key /etc/kubernetes/pki/etcd/server.key snapshot restore etcd_snap.db --data-dir snapshot
2023-08-03 16:41:33.734528 I | mvcc: restore compact to 568828
2023-08-03 16:41:33.753877 I | etcdserver/membership: added member 8e9e05c52164694d [http://localhost:2380] to cluster cdf818194e3a8c32

Alternatively, you can restore snapshot to etcd database directory when it is stopped or switch to a new one.

sudo ETCDCTL_API=3 etcdctl --endpoints https://127.0.0.1:2379 --cacert /etc/kubernetes/pki/etcd/ca.crt --cert /etc/kubernetes/pki/etcd/server.crt --key /etc/kubernetes/pki/etcd/server.key snapshot restore etcd_snap.db --data-dir snapshot
2023-08-03 10:43:52.425419 I | mvcc: restore compact to 568828
2023-08-03 10:43:52.694561 I | etcdserver/membership: added member 8e9e05c52164694d [http://localhost:2380] to cluster cdf818194e3a8c32

Partial restore is not possible using this method.

Reference

  • https://sleeplessbeastie.eu/2023/12/15/how-to-backup-and-restore-etcd-database/
Leave a message