diff --git a/agent/conf/agent.properties b/agent/conf/agent.properties index 0dc5b8211e0d..777bd4bf5cd5 100644 --- a/agent/conf/agent.properties +++ b/agent/conf/agent.properties @@ -457,3 +457,6 @@ iscsi.session.cleanup.enabled=false # Instance conversion VIRT_V2V_TMPDIR env var #convert.instance.env.virtv2v.tmpdir= + +# Time, in seconds, to wait before retrying to rebase during the incremental snapshot process. +# incremental.snapshot.retry.rebase.wait=60 diff --git a/agent/src/main/java/com/cloud/agent/properties/AgentProperties.java b/agent/src/main/java/com/cloud/agent/properties/AgentProperties.java index 1561f0d5cfbc..9d3f9f7df55f 100644 --- a/agent/src/main/java/com/cloud/agent/properties/AgentProperties.java +++ b/agent/src/main/java/com/cloud/agent/properties/AgentProperties.java @@ -885,6 +885,11 @@ public Property getWorkers() { */ public static final Property CREATE_FULL_CLONE = new Property<>("create.full.clone", false); + /** + * Time, in seconds, to wait before retrying to rebase during the incremental snapshot process. + * */ + public static final Property INCREMENTAL_SNAPSHOT_RETRY_REBASE_WAIT = new Property<>("incremental.snapshot.retry.rebase.wait", 60); + public static class Property { private String name; diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/KVMStorageProcessor.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/KVMStorageProcessor.java index 030d9747d6cd..6e03b84d20cf 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/KVMStorageProcessor.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/KVMStorageProcessor.java @@ -185,6 +185,8 @@ public class KVMStorageProcessor implements StorageProcessor { private int incrementalSnapshotTimeout; + private int incrementalSnapshotRetryRebaseWait; + private static final String CHECKPOINT_XML_TEMP_DIR = "/tmp/cloudstack/checkpointXMLs"; private static final String BACKUP_XML_TEMP_DIR = "/tmp/cloudstack/backupXMLs"; @@ -252,6 +254,7 @@ public boolean configure(final String name, final Map params) th _cmdsTimeout = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.CMDS_TIMEOUT) * 1000; incrementalSnapshotTimeout = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.INCREMENTAL_SNAPSHOT_TIMEOUT) * 1000; + incrementalSnapshotRetryRebaseWait = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.INCREMENTAL_SNAPSHOT_RETRY_REBASE_WAIT) * 1000; return true; } @@ -2093,8 +2096,25 @@ protected void rebaseSnapshot(SnapshotObjectTO snapshotObjectTO, KVMStoragePool QemuImg qemuImg = new QemuImg(wait); qemuImg.rebase(snapshotFile, parentSnapshotFile, PhysicalDiskFormat.QCOW2.toString(), false); } catch (LibvirtException | QemuImgException e) { - logger.error("Exception while rebasing incremental snapshot [{}] due to: [{}].", snapshotName, e.getMessage(), e); - throw new CloudRuntimeException(e); + if (!StringUtils.contains(e.getMessage(), "Is another process using the image")) { + logger.error("Exception while rebasing incremental snapshot [{}] due to: [{}].", snapshotName, e.getMessage(), e); + throw new CloudRuntimeException(e); + } + retryRebase(snapshotName, wait, e, snapshotFile, parentSnapshotFile); + } + } + + private void retryRebase(String snapshotName, int wait, Exception e, QemuImgFile snapshotFile, QemuImgFile parentSnapshotFile) { + logger.warn("Libvirt still has not released the lock, will wait [{}] milliseconds and try again later.", incrementalSnapshotRetryRebaseWait); + try { + Thread.sleep(incrementalSnapshotRetryRebaseWait); + QemuImg qemuImg = new QemuImg(wait); + qemuImg.rebase(snapshotFile, parentSnapshotFile, PhysicalDiskFormat.QCOW2.toString(), false); + } catch (LibvirtException | QemuImgException | InterruptedException ex) { + logger.error("Unable to rebase snapshot [{}].", snapshotName, ex); + CloudRuntimeException cre = new CloudRuntimeException(ex); + cre.addSuppressed(e); + throw cre; } }