虚拟机热迁移
开启虚拟机热迁移
在 v0.56 版本之前,虚拟机热迁移需要开启 LiveMigration
特性功能,之后的默认开启了。
虚拟机热迁移要求:
- PVC 必须支持 ReadWriteMany (RWX),且必须是共享存储(可以用 NFS 创建个 PVC)
- 虚拟机网络需要使用 masquerade 模式
virt-launcher
POD 的 49152-49153 端口未被占用virt-launcher
Pod 的主网卡名称必须一致,在 Multus 里也要确保网卡为相同名称
[root@base-k8s-master-1 ~]# kubectl patch kubevirts.kubevirt.io \
-n kubevirt kubevirt --type json \
-p '[{"op": "add", "path": "/spec/workloadUpdateStrategy/workloadUpdateMethods", "value": ["LiveMigrate"]}]'
kubevirt.kubevirt.io/kubevirt patched
[root@base-k8s-master-1 kubevirt]# kubectl get kubevirts.kubevirt.io \
-n kubevirt kubevirt -o jsonpath="{.spec}" | jq
{
"certificateRotateStrategy": {},
"configuration": {},
"customizeComponents": {},
"imagePullPolicy": "IfNotPresent",
"workloadUpdateStrategy": {
"workloadUpdateMethods": [
"LiveMigrate"
]
}
}
开启功能后不需要虚拟机,即可生效。
测试虚拟机热迁移
测试前先说一下虚拟机配置:
- 只有一个网卡,网卡设置为 masquerade 模式(网卡这个在写网络的时候再详细写)。
- 有一个硬盘,硬盘对应的 PVC 为 RWX 模式,来源为 Ceph RBD,是共享存储。
PVC 好理解,网卡这个看 TroubleShooting 章节。
# 检查当前状态
[root@base-k8s-master-1 ~]# kubectl get vm,vmi,pod
NAME AGE STATUS READY
virtualmachine.kubevirt.io/my-vm 3d22h Running True
NAME AGE PHASE IP NODENAME READY
virtualmachineinstance.kubevirt.io/my-vm 23h Running 10.100.171.26 base-k8s-worker-2.example.com True
NAME READY STATUS RESTARTS AGE
pod/virt-launcher-my-vm-j4ggh 2/2 Running 0 23h
# 开始迁移
[root@base-k8s-master-1 kubevirt]# virtctl migrate my-vm
VM my-vm was scheduled to migrate
# 查看过程
# 查看 vm,vmi,pod
[root@base-k8s-master-1 ~]# kubectl get vm,vmi,pod
NAME AGE STATUS READY
virtualmachine.kubevirt.io/my-vm 3d22h Running True
NAME AGE PHASE IP NODENAME READY
virtualmachineinstance.kubevirt.io/my-vm 23h Running 10.100.171.26 base-k8s-worker-2.example.com True
NAME READY STATUS RESTARTS AGE
pod/virt-launcher-my-vm-j4ggh 2/2 Running 0 23h
pod/virt-launcher-my-vm-zjrkb 0/2 ContainerCreating 0 2s
[root@base-k8s-master-1 ~]# kubectl get vm,vmi,pod
NAME AGE STATUS READY
virtualmachine.kubevirt.io/my-vm 3d22h Running True
NAME AGE PHASE IP NODENAME READY
virtualmachineinstance.kubevirt.io/my-vm 23h Running 10.100.171.26 base-k8s-worker-2.example.com True
NAME READY STATUS RESTARTS AGE
pod/virt-launcher-my-vm-j4ggh 2/2 Running 0 23h
pod/virt-launcher-my-vm-zjrkb 2/2 Running 0 8s
[root@base-k8s-master-1 ~]# kubectl get vm,vmi,pod
NAME AGE STATUS READY
virtualmachine.kubevirt.io/my-vm 3d22h Migrating True
NAME AGE PHASE IP NODENAME READY
virtualmachineinstance.kubevirt.io/my-vm 23h Running 10.100.171.26 base-k8s-worker-2.example.com True
NAME READY STATUS RESTARTS AGE
pod/virt-launcher-my-vm-j4ggh 2/2 Running 0 23h
pod/virt-launcher-my-vm-zjrkb 2/2 Running 0 9s
[root@base-k8s-master-1 ~]# kubectl get vm,vmi,pod
NAME AGE STATUS READY
virtualmachine.kubevirt.io/my-vm 3d22h Running False
NAME AGE PHASE IP NODENAME READY
virtualmachineinstance.kubevirt.io/my-vm 23h Running 10.100.223.12 base-k8s-worker-1.example.com False
NAME READY STATUS RESTARTS AGE
pod/virt-launcher-my-vm-j4ggh 2/2 Running 0 23h
pod/virt-launcher-my-vm-zjrkb 2/2 Running 0 12s
[root@base-k8s-master-1 ~]# kubectl get vm,vmi,pod
NAME AGE STATUS READY
virtualmachine.kubevirt.io/my-vm 3d22h Running True
NAME AGE PHASE IP NODENAME READY
virtualmachineinstance.kubevirt.io/my-vm 23h Running 10.100.223.12 base-k8s-worker-1.example.com True
NAME READY STATUS RESTARTS AGE
pod/virt-launcher-my-vm-j4ggh 1/2 NotReady 0 23h
pod/virt-launcher-my-vm-zjrkb 2/2 Running 0 12s
[root@base-k8s-master-1 ~]# kubectl get vm,vmi,pod
NAME AGE STATUS READY
virtualmachine.kubevirt.io/my-vm 3d22h Running True
NAME AGE PHASE IP NODENAME READY
virtualmachineinstance.kubevirt.io/my-vm 23h Running 10.100.223.12 base-k8s-worker-1.example.com True
NAME READY STATUS RESTARTS AGE
pod/virt-launcher-my-vm-j4ggh 0/2 Completed 0 23h
pod/virt-launcher-my-vm-zjrkb 2/2 Running 0 14s
# 每次用 virtctl 迁移都会自动创建一个 VirtualMachineInstanceMigration 资源
# 查看 VirtualMachineInstanceMigration 的变化
[root@base-k8s-master-1 kubevirt]# kubectl get virtualmachineinstancemigrations.kubevirt.io
NAME PHASE VMI
kubevirt-migrate-vm-8jzrj Scheduling my-vm
[root@base-k8s-master-1 kubevirt]# kubectl get virtualmachineinstancemigrations.kubevirt.io
NAME PHASE VMI
kubevirt-migrate-vm-8jzrj Running my-vm
[root@base-k8s-master-1 kubevirt]# kubectl get virtualmachineinstancemigrations.kubevirt.io
NAME PHASE VMI
kubevirt-migrate-vm-8jzrj Succeeded my-vm
补充两点:
-
可以看到迁移成功后多了一个
Completed
的 Pod,不需要手动清理,可以不管,非要清理的话找机会用virtctl
重启虚拟机,多余的 Pod 就会自动清理。 -
除了
virtctl migrate my-vm
迁移虚拟机,还可以通过创建VirtualMachineInstanceMigration
资源来触发迁移。[root@base-k8s-master-1 kubevirt]# cat migratiom.yml apiVersion: kubevirt.io/v1 kind: VirtualMachineInstanceMigration metadata: name: migration-job-my-vm spec: vmiName: my-vm [root@base-k8s-master-1 kubevirt]# kubectl apply -f migratiom.yml virtualmachineinstancemigration.kubevirt.io/migration-job-my-vm created [root@base-k8s-master-1 kubevirt]# kubectl get virtualmachineinstancemigrations.kubevirt.io NAME PHASE VMI kubevirt-migrate-vm-8jzrj Succeeded my-vm migration-job-my-vm Succeeded my-vm [root@base-k8s-master-1 ~]# kubectl get vmi NAME AGE PHASE IP NODENAME READY my-vm 24h Running 10.100.171.16 base-k8s-worker-2.example.com True
可以看到刚刚在 Worker-1 的虚拟机又回到了 Worker-2。
查看 VM 和 VMI 的迁移记录
[root@base-k8s-master-1 kubevirt]# kubectl get vmi my-vm -o yaml
status:
...output omitted...
migrationMethod: LiveMigration
migrationState:
completed: true
endTimestamp: "2025-03-06T14:41:24Z"
migrationConfiguration:
allowAutoConverge: false
allowPostCopy: false
bandwidthPerMigration: "0"
completionTimeoutPerGiB: 150
nodeDrainTaintKey: kubevirt.io/drain
parallelMigrationsPerCluster: 5
parallelOutboundMigrationsPerNode: 2
progressTimeout: 150
unsafeMigrationOverride: false
migrationUid: c5b21506-2583-4b03-9596-d66211bc8bdc
mode: PreCopy
sourceNode: base-k8s-worker-2.example.com
sourcePod: virt-launcher-my-vm-ckxgp
startTimestamp: "2025-03-06T14:41:20Z"
targetDirectMigrationNodePorts:
"39065": 49152
"39073": 0
targetNode: base-k8s-worker-1.example.com
targetNodeAddress: 10.100.223.21
targetNodeDomainDetected: true
targetNodeDomainReadyTimestamp: "2025-03-06T14:41:23Z"
targetPod: virt-launcher-my-vm-6mj5h
migrationTransport: Unix
nodeName: base-k8s-worker-1.example.com
phase: Running
phaseTransitionTimestamps:
- phase: Pending
phaseTransitionTimestamp: "2025-03-05T14:27:25Z"
- phase: Scheduling
phaseTransitionTimestamp: "2025-03-05T14:27:25Z"
- phase: Scheduled
phaseTransitionTimestamp: "2025-03-05T14:27:27Z"
- phase: Running
phaseTransitionTimestamp: "2025-03-05T14:27:30Z"
...output omitted...
这里我做过其他迁移测试,所以里边的信息和上边测试的记录对不上了,但是也是一个迁移的记录。
停止迁移
# 通过 virtctl 停止
[root@base-k8s-master-1 kubevirt]# virtctl migrate-cancel my-vm
VM my-vm was scheduled to migrate-cancel
# 通过删除 virtualmachineinstancemigrations.kubevirt.io 资源停止
[root@base-k8s-master-1 kubevirt]# kubectl delete virtualmachineinstancemigrations.kubevirt.io kubevirt-migrate-vm-bs7cl
virtualmachineinstancemigration.kubevirt.io "kubevirt-migrate-vm-bs7cl" deleted
可以看到如下状态:
[root@base-k8s-master-1 kubevirt]# kubectl get vmi my-vm -o yaml
status:
...output omitted...
migrationState:
abortRequested: true
abortStatus: Succeeded
completed: true
endTimestamp: "2025-03-06T15:02:26Z"
failed: true
failureReason: 'Live migration aborted '
...output omitted...
TroubleShooting
迁移报错网卡问题
迁移时报错:
[root@base-k8s-master-1 ~]# virtctl migrate my-vm
Error migrating VirtualMachine Internal error occurred: admission webhook "migration-create-validator.kubevirt.io" denied the request: Cannot migrate VMI, Reason: InterfaceNotLiveMigratable, Message: cannot migrate VMI which does not use masquerade, bridge with kubevirt.io/allow-pod-bridge-network-live-migration VM annotation or a migratable plugin to connect to the pod network
原因在于我创建的虚拟机网卡为网桥模式,需要手动指定网卡为 masquerade
模式。
查看虚拟机状态:
[root@base-k8s-master-1 kubevirt]# kubectl get vm my-vm -o yaml
...output omitted...
status:
conditions:
...output omitted...
- lastProbeTime: null
lastTransitionTime: null
message: cannot migrate VMI which does not use masquerade, bridge with kubevirt.io/allow-pod-bridge-network-live-migration
VM annotation or a migratable plugin to connect to the pod network
reason: InterfaceNotLiveMigratable
status: "False"
type: LiveMigratable
- lastProbeTime: null
lastTransitionTime: null
message: 'InterfaceNotLiveMigratable: cannot migrate VMI which does not use
masquerade, bridge with kubevirt.io/allow-pod-bridge-network-live-migration
VM annotation or a migratable plugin to connect to the pod network'
reason: NotMigratable
status: "False"
type: StorageLiveMigratable
...output omitted...
查看当前虚拟机配置:
[root@base-k8s-master-1 kubevirt]# kubectl get vm my-vm -o yaml
...output omitted...
template:
metadata:
creationTimestamp: null
spec:
architecture: amd64
domain:
cpu:
cores: 1
devices: {}
machine:
type: q35
memory:
guest: 513Mi
resources: {}
terminationGracePeriodSeconds: 180
...output omitted...
添加网卡配置如下:
[root@base-k8s-master-1 kubevirt]# kubectl get vm my-vm -o yaml
...output omitted...
template:
metadata:
creationTimestamp: null
spec:
architecture: amd64
domain:
cpu:
cores: 1
devices:
interfaces: # 添加配置
- masquerade: {} # 添加配置
name: default # 添加配置
machine:
type: q35
memory:
guest: 513Mi
resources: {}
networks: # 添加配置
- name: default # 添加配置
pod: {} # 添加配置
terminationGracePeriodSeconds: 180
...output omitted...
重启虚拟机:
[root@base-k8s-master-1 ~]# virtctl restart my-vm
VM my-vm was scheduled to restart
# 查看虚拟机状态
[root@base-k8s-master-1 kubevirt]# kubectl get vm my-vm -o yaml
...output omitted...
status:
conditions:
...output omitted...
- lastProbeTime: null
lastTransitionTime: null
status: "True"
type: LiveMigratable
- lastProbeTime: null
lastTransitionTime: null
status: "True"
type: StorageLiveMigratable
...output omitted...