blob: 682bfb9936ee9576a4423ca00d46c4afbe0de170 [file] [log] [blame]
// Licensed to the Apacohe Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package com.cloud.vm;
import java.net.URI;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TimeZone;
import java.util.UUID;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import javax.inject.Inject;
import javax.naming.ConfigurationException;
import org.apache.commons.collections.CollectionUtils;
import org.apache.log4j.Logger;
import org.apache.cloudstack.affinity.dao.AffinityGroupVMMapDao;
import org.apache.cloudstack.ca.CAManager;
import org.apache.cloudstack.context.CallContext;
import org.apache.cloudstack.engine.orchestration.service.NetworkOrchestrationService;
import org.apache.cloudstack.engine.orchestration.service.VolumeOrchestrationService;
import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreManager;
import org.apache.cloudstack.engine.subsystem.api.storage.PrimaryDataStoreInfo;
import org.apache.cloudstack.engine.subsystem.api.storage.StoragePoolAllocator;
import org.apache.cloudstack.framework.ca.Certificate;
import org.apache.cloudstack.framework.config.ConfigDepot;
import org.apache.cloudstack.framework.config.ConfigKey;
import org.apache.cloudstack.framework.config.Configurable;
import org.apache.cloudstack.framework.config.dao.ConfigurationDao;
import org.apache.cloudstack.framework.jobs.AsyncJob;
import org.apache.cloudstack.framework.jobs.AsyncJobExecutionContext;
import org.apache.cloudstack.framework.jobs.AsyncJobManager;
import org.apache.cloudstack.framework.jobs.Outcome;
import org.apache.cloudstack.framework.jobs.dao.VmWorkJobDao;
import org.apache.cloudstack.framework.jobs.impl.AsyncJobVO;
import org.apache.cloudstack.framework.jobs.impl.JobSerializerHelper;
import org.apache.cloudstack.framework.jobs.impl.OutcomeImpl;
import org.apache.cloudstack.framework.jobs.impl.VmWorkJobVO;
import org.apache.cloudstack.framework.messagebus.MessageBus;
import org.apache.cloudstack.framework.messagebus.MessageDispatcher;
import org.apache.cloudstack.framework.messagebus.MessageHandler;
import org.apache.cloudstack.jobs.JobInfo;
import org.apache.cloudstack.managed.context.ManagedContextRunnable;
import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao;
import org.apache.cloudstack.storage.datastore.db.StoragePoolVO;
import org.apache.cloudstack.storage.to.VolumeObjectTO;
import org.apache.cloudstack.utils.identity.ManagementServerNode;
import com.cloud.agent.AgentManager;
import com.cloud.agent.Listener;
import com.cloud.agent.api.AgentControlAnswer;
import com.cloud.agent.api.AgentControlCommand;
import com.cloud.agent.api.Answer;
import com.cloud.agent.api.AttachOrDettachConfigDriveCommand;
import com.cloud.agent.api.CheckVirtualMachineAnswer;
import com.cloud.agent.api.CheckVirtualMachineCommand;
import com.cloud.agent.api.ClusterVMMetaDataSyncAnswer;
import com.cloud.agent.api.ClusterVMMetaDataSyncCommand;
import com.cloud.agent.api.Command;
import com.cloud.agent.api.MigrateCommand;
import com.cloud.agent.api.ModifyTargetsCommand;
import com.cloud.agent.api.PingRoutingCommand;
import com.cloud.agent.api.PlugNicAnswer;
import com.cloud.agent.api.PlugNicCommand;
import com.cloud.agent.api.PrepareForMigrationCommand;
import com.cloud.agent.api.RebootAnswer;
import com.cloud.agent.api.RebootCommand;
import com.cloud.agent.api.ReplugNicAnswer;
import com.cloud.agent.api.ReplugNicCommand;
import com.cloud.agent.api.RestoreVMSnapshotAnswer;
import com.cloud.agent.api.RestoreVMSnapshotCommand;
import com.cloud.agent.api.ScaleVmCommand;
import com.cloud.agent.api.StartAnswer;
import com.cloud.agent.api.StartCommand;
import com.cloud.agent.api.StartupCommand;
import com.cloud.agent.api.StartupRoutingCommand;
import com.cloud.agent.api.StopAnswer;
import com.cloud.agent.api.StopCommand;
import com.cloud.agent.api.UnPlugNicAnswer;
import com.cloud.agent.api.UnPlugNicCommand;
import com.cloud.agent.api.UnregisterVMCommand;
import com.cloud.agent.api.routing.NetworkElementCommand;
import com.cloud.agent.api.to.DiskTO;
import com.cloud.agent.api.to.GPUDeviceTO;
import com.cloud.agent.api.to.NicTO;
import com.cloud.agent.api.to.VirtualMachineTO;
import com.cloud.agent.manager.Commands;
import com.cloud.agent.manager.allocator.HostAllocator;
import com.cloud.alert.AlertManager;
import com.cloud.capacity.CapacityManager;
import com.cloud.configuration.Config;
import com.cloud.dc.ClusterDetailsDao;
import com.cloud.dc.ClusterDetailsVO;
import com.cloud.dc.DataCenter;
import com.cloud.dc.DataCenterVO;
import com.cloud.dc.HostPodVO;
import com.cloud.dc.Pod;
import com.cloud.dc.dao.ClusterDao;
import com.cloud.dc.dao.DataCenterDao;
import com.cloud.dc.dao.HostPodDao;
import com.cloud.deploy.DataCenterDeployment;
import com.cloud.deploy.DeployDestination;
import com.cloud.deploy.DeploymentPlan;
import com.cloud.deploy.DeploymentPlanner;
import com.cloud.deploy.DeploymentPlanner.ExcludeList;
import com.cloud.deploy.DeploymentPlanningManager;
import com.cloud.domain.dao.DomainDao;
import com.cloud.event.EventTypes;
import com.cloud.event.UsageEventUtils;
import com.cloud.exception.AffinityConflictException;
import com.cloud.exception.AgentUnavailableException;
import com.cloud.exception.ConcurrentOperationException;
import com.cloud.exception.ConnectionException;
import com.cloud.exception.InsufficientAddressCapacityException;
import com.cloud.exception.InsufficientCapacityException;
import com.cloud.exception.InsufficientServerCapacityException;
import com.cloud.exception.InsufficientVirtualNetworkCapacityException;
import com.cloud.exception.InvalidParameterValueException;
import com.cloud.exception.OperationTimedoutException;
import com.cloud.exception.ResourceUnavailableException;
import com.cloud.exception.StorageUnavailableException;
import com.cloud.gpu.dao.VGPUTypesDao;
import com.cloud.ha.HighAvailabilityManager;
import com.cloud.ha.HighAvailabilityManager.WorkType;
import com.cloud.host.Host;
import com.cloud.host.HostVO;
import com.cloud.host.Status;
import com.cloud.host.dao.HostDao;
import com.cloud.hypervisor.Hypervisor.HypervisorType;
import com.cloud.hypervisor.HypervisorGuru;
import com.cloud.hypervisor.HypervisorGuruManager;
import com.cloud.network.Network;
import com.cloud.network.NetworkModel;
import com.cloud.network.dao.NetworkDao;
import com.cloud.network.dao.NetworkVO;
import com.cloud.network.router.VirtualRouter;
import com.cloud.network.rules.RulesManager;
import com.cloud.offering.DiskOffering;
import com.cloud.offering.DiskOfferingInfo;
import com.cloud.offering.ServiceOffering;
import com.cloud.org.Cluster;
import com.cloud.resource.ResourceManager;
import com.cloud.resource.ResourceState;
import com.cloud.service.ServiceOfferingVO;
import com.cloud.service.dao.ServiceOfferingDao;
import com.cloud.storage.DiskOfferingVO;
import com.cloud.storage.ScopeType;
import com.cloud.storage.Storage.ImageFormat;
import com.cloud.storage.StoragePool;
import com.cloud.storage.VMTemplateVO;
import com.cloud.storage.Volume;
import com.cloud.storage.Volume.Type;
import com.cloud.storage.VolumeVO;
import com.cloud.storage.dao.DiskOfferingDao;
import com.cloud.storage.dao.GuestOSCategoryDao;
import com.cloud.storage.dao.GuestOSDao;
import com.cloud.storage.dao.StoragePoolHostDao;
import com.cloud.storage.dao.VMTemplateDao;
import com.cloud.storage.dao.VolumeDao;
import com.cloud.template.VirtualMachineTemplate;
import com.cloud.user.Account;
import com.cloud.user.User;
import com.cloud.utils.DateUtil;
import com.cloud.utils.Journal;
import com.cloud.utils.Pair;
import com.cloud.utils.Predicate;
import com.cloud.utils.ReflectionUse;
import com.cloud.utils.StringUtils;
import com.cloud.utils.Ternary;
import com.cloud.utils.component.ManagerBase;
import com.cloud.utils.concurrency.NamedThreadFactory;
import com.cloud.utils.db.DB;
import com.cloud.utils.db.EntityManager;
import com.cloud.utils.db.GlobalLock;
import com.cloud.utils.db.Transaction;
import com.cloud.utils.db.TransactionCallbackWithException;
import com.cloud.utils.db.TransactionCallbackWithExceptionNoReturn;
import com.cloud.utils.db.TransactionLegacy;
import com.cloud.utils.db.TransactionStatus;
import com.cloud.utils.exception.CloudRuntimeException;
import com.cloud.utils.exception.ExecutionException;
import com.cloud.utils.fsm.NoTransitionException;
import com.cloud.utils.fsm.StateMachine2;
import com.cloud.vm.ItWorkVO.Step;
import com.cloud.vm.VirtualMachine.Event;
import com.cloud.vm.VirtualMachine.PowerState;
import com.cloud.vm.VirtualMachine.State;
import com.cloud.vm.dao.NicDao;
import com.cloud.vm.dao.UserVmDao;
import com.cloud.vm.dao.UserVmDetailsDao;
import com.cloud.vm.dao.VMInstanceDao;
import com.cloud.vm.snapshot.VMSnapshotManager;
import com.cloud.vm.snapshot.VMSnapshotVO;
import com.cloud.vm.snapshot.dao.VMSnapshotDao;
import com.google.common.base.Strings;
public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMachineManager, VmWorkJobHandler, Listener, Configurable {
private static final Logger s_logger = Logger.getLogger(VirtualMachineManagerImpl.class);
public static final String VM_WORK_JOB_HANDLER = VirtualMachineManagerImpl.class.getSimpleName();
private static final String VM_SYNC_ALERT_SUBJECT = "VM state sync alert";
@Inject
DataStoreManager dataStoreMgr;
@Inject
protected NetworkOrchestrationService _networkMgr;
@Inject
protected NetworkModel _networkModel;
@Inject
protected AgentManager _agentMgr;
@Inject
protected VMInstanceDao _vmDao;
@Inject
protected ServiceOfferingDao _offeringDao;
@Inject
protected DiskOfferingDao _diskOfferingDao;
@Inject
protected VMTemplateDao _templateDao;
@Inject
protected DomainDao _domainDao;
@Inject
protected ItWorkDao _workDao;
@Inject
protected UserVmDao _userVmDao;
@Inject
protected UserVmService _userVmService;
@Inject
protected CapacityManager _capacityMgr;
@Inject
protected NicDao _nicsDao;
@Inject
protected HostDao _hostDao;
@Inject
protected AlertManager _alertMgr;
@Inject
protected GuestOSCategoryDao _guestOsCategoryDao;
@Inject
protected GuestOSDao _guestOsDao;
@Inject
protected VolumeDao _volsDao;
@Inject
protected HighAvailabilityManager _haMgr;
@Inject
protected HostPodDao _podDao;
@Inject
protected DataCenterDao _dcDao;
@Inject
protected ClusterDao _clusterDao;
@Inject
protected PrimaryDataStoreDao _storagePoolDao;
@Inject
protected HypervisorGuruManager _hvGuruMgr;
@Inject
protected NetworkDao _networkDao;
@Inject
protected StoragePoolHostDao _poolHostDao;
@Inject
protected VMSnapshotDao _vmSnapshotDao;
@Inject
protected RulesManager rulesMgr;
@Inject
protected AffinityGroupVMMapDao _affinityGroupVMMapDao;
@Inject
protected VGPUTypesDao _vgpuTypesDao;
@Inject
protected EntityManager _entityMgr;
@Inject
protected GuestOSCategoryDao _guestOSCategoryDao;
@Inject
protected GuestOSDao _guestOSDao = null;
@Inject
protected UserVmDetailsDao _vmDetailsDao;
@Inject
protected ServiceOfferingDao _serviceOfferingDao = null;
@Inject
protected CAManager caManager;
@Inject
ConfigDepot _configDepot;
protected List<HostAllocator> hostAllocators;
public List<HostAllocator> getHostAllocators() {
return hostAllocators;
}
public void setHostAllocators(final List<HostAllocator> hostAllocators) {
this.hostAllocators = hostAllocators;
}
protected List<StoragePoolAllocator> _storagePoolAllocators;
@Inject
protected ResourceManager _resourceMgr;
@Inject
protected VMSnapshotManager _vmSnapshotMgr = null;
@Inject
protected ClusterDetailsDao _clusterDetailsDao;
@Inject
protected UserVmDetailsDao _uservmDetailsDao;
@Inject
protected ConfigurationDao _configDao;
@Inject
VolumeOrchestrationService volumeMgr;
@Inject
DeploymentPlanningManager _dpMgr;
@Inject
protected MessageBus _messageBus;
@Inject
protected VirtualMachinePowerStateSync _syncMgr;
@Inject
protected VmWorkJobDao _workJobDao;
@Inject
protected AsyncJobManager _jobMgr;
VmWorkJobHandlerProxy _jobHandlerProxy = new VmWorkJobHandlerProxy(this);
Map<VirtualMachine.Type, VirtualMachineGuru> _vmGurus = new HashMap<VirtualMachine.Type, VirtualMachineGuru>();
protected StateMachine2<State, VirtualMachine.Event, VirtualMachine> _stateMachine;
static final ConfigKey<Integer> StartRetry = new ConfigKey<Integer>("Advanced", Integer.class, "start.retry", "10",
"Number of times to retry create and start commands", true);
static final ConfigKey<Integer> VmOpWaitInterval = new ConfigKey<Integer>("Advanced", Integer.class, "vm.op.wait.interval", "120",
"Time (in seconds) to wait before checking if a previous operation has succeeded", true);
static final ConfigKey<Integer> VmOpLockStateRetry = new ConfigKey<Integer>("Advanced", Integer.class, "vm.op.lock.state.retry", "5",
"Times to retry locking the state of a VM for operations, -1 means forever", true);
static final ConfigKey<Long> VmOpCleanupInterval = new ConfigKey<Long>("Advanced", Long.class, "vm.op.cleanup.interval", "86400",
"Interval to run the thread that cleans up the vm operations (in seconds)", false);
static final ConfigKey<Long> VmOpCleanupWait = new ConfigKey<Long>("Advanced", Long.class, "vm.op.cleanup.wait", "3600",
"Time (in seconds) to wait before cleanuping up any vm work items", true);
static final ConfigKey<Long> VmOpCancelInterval = new ConfigKey<Long>("Advanced", Long.class, "vm.op.cancel.interval", "3600",
"Time (in seconds) to wait before cancelling a operation", false);
static final ConfigKey<Boolean> VmDestroyForcestop = new ConfigKey<Boolean>("Advanced", Boolean.class, "vm.destroy.forcestop", "false",
"On destroy, force-stop takes this value ", true);
static final ConfigKey<Integer> ClusterDeltaSyncInterval = new ConfigKey<Integer>("Advanced", Integer.class, "sync.interval", "60",
"Cluster Delta sync interval in seconds",
false);
static final ConfigKey<Integer> ClusterVMMetaDataSyncInterval = new ConfigKey<Integer>("Advanced", Integer.class, "vmmetadata.sync.interval", "180", "Cluster VM metadata sync interval in seconds",
false);
static final ConfigKey<Long> VmJobCheckInterval = new ConfigKey<Long>("Advanced",
Long.class, "vm.job.check.interval", "3000",
"Interval in milliseconds to check if the job is complete", false);
static final ConfigKey<Long> VmJobTimeout = new ConfigKey<Long>("Advanced",
Long.class, "vm.job.timeout", "600000",
"Time in milliseconds to wait before attempting to cancel a job", false);
static final ConfigKey<Integer> VmJobStateReportInterval = new ConfigKey<Integer>("Advanced",
Integer.class, "vm.job.report.interval", "60",
"Interval to send application level pings to make sure the connection is still working", false);
static final ConfigKey<Boolean> HaVmRestartHostUp = new ConfigKey<Boolean>("Advanced", Boolean.class, "ha.vm.restart.hostup", "true",
"If an out-of-band stop of a VM is detected and its host is up, then power on the VM", true);
ScheduledExecutorService _executor = null;
protected long _nodeId;
@Override
public void registerGuru(final VirtualMachine.Type type, final VirtualMachineGuru guru) {
synchronized (_vmGurus) {
_vmGurus.put(type, guru);
}
}
@Override
@DB
public void allocate(final String vmInstanceName, final VirtualMachineTemplate template, final ServiceOffering serviceOffering,
final DiskOfferingInfo rootDiskOfferingInfo, final List<DiskOfferingInfo> dataDiskOfferings,
final LinkedHashMap<? extends Network, List<? extends NicProfile>> auxiliaryNetworks, final DeploymentPlan plan, final HypervisorType hyperType, final Map<String, Map<Integer, String>> extraDhcpOptions, final Map<Long, DiskOffering> datadiskTemplateToDiskOfferingMap)
throws InsufficientCapacityException {
final VMInstanceVO vm = _vmDao.findVMByInstanceName(vmInstanceName);
final Account owner = _entityMgr.findById(Account.class, vm.getAccountId());
if (s_logger.isDebugEnabled()) {
s_logger.debug("Allocating entries for VM: " + vm);
}
vm.setDataCenterId(plan.getDataCenterId());
if (plan.getPodId() != null) {
vm.setPodIdToDeployIn(plan.getPodId());
}
assert plan.getClusterId() == null && plan.getPoolId() == null : "We currently don't support cluster and pool preset yet";
final VMInstanceVO vmFinal = _vmDao.persist(vm);
final VirtualMachineProfileImpl vmProfile = new VirtualMachineProfileImpl(vmFinal, template, serviceOffering, null, null);
Transaction.execute(new TransactionCallbackWithExceptionNoReturn<InsufficientCapacityException>() {
@Override
public void doInTransactionWithoutResult(final TransactionStatus status) throws InsufficientCapacityException {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Allocating nics for " + vmFinal);
}
try {
if (!vmProfile.getBootArgs().contains("ExternalLoadBalancerVm"))
_networkMgr.allocate(vmProfile, auxiliaryNetworks, extraDhcpOptions);
} catch (final ConcurrentOperationException e) {
throw new CloudRuntimeException("Concurrent operation while trying to allocate resources for the VM", e);
}
if (s_logger.isDebugEnabled()) {
s_logger.debug("Allocating disks for " + vmFinal);
}
if (template.getFormat() == ImageFormat.ISO) {
volumeMgr.allocateRawVolume(Type.ROOT, "ROOT-" + vmFinal.getId(), rootDiskOfferingInfo.getDiskOffering(), rootDiskOfferingInfo.getSize(),
rootDiskOfferingInfo.getMinIops(), rootDiskOfferingInfo.getMaxIops(), vmFinal, template, owner, null);
} else if (template.getFormat() == ImageFormat.BAREMETAL) {
// Do nothing
} else {
volumeMgr.allocateTemplatedVolume(Type.ROOT, "ROOT-" + vmFinal.getId(), rootDiskOfferingInfo.getDiskOffering(), rootDiskOfferingInfo.getSize(),
rootDiskOfferingInfo.getMinIops(), rootDiskOfferingInfo.getMaxIops(), template, vmFinal, owner);
}
if (dataDiskOfferings != null) {
for (final DiskOfferingInfo dataDiskOfferingInfo : dataDiskOfferings) {
volumeMgr.allocateRawVolume(Type.DATADISK, "DATA-" + vmFinal.getId(), dataDiskOfferingInfo.getDiskOffering(), dataDiskOfferingInfo.getSize(),
dataDiskOfferingInfo.getMinIops(), dataDiskOfferingInfo.getMaxIops(), vmFinal, template, owner, null);
}
}
if (datadiskTemplateToDiskOfferingMap != null && !datadiskTemplateToDiskOfferingMap.isEmpty()) {
int diskNumber = 1;
for (Entry<Long, DiskOffering> dataDiskTemplateToDiskOfferingMap : datadiskTemplateToDiskOfferingMap.entrySet()) {
DiskOffering diskOffering = dataDiskTemplateToDiskOfferingMap.getValue();
long diskOfferingSize = diskOffering.getDiskSize() / (1024 * 1024 * 1024);
VMTemplateVO dataDiskTemplate = _templateDao.findById(dataDiskTemplateToDiskOfferingMap.getKey());
volumeMgr.allocateRawVolume(Type.DATADISK, "DATA-" + vmFinal.getId() + "-" + String.valueOf(diskNumber), diskOffering, diskOfferingSize, null, null,
vmFinal, dataDiskTemplate, owner, Long.valueOf(diskNumber));
diskNumber++;
}
}
}
});
if (s_logger.isDebugEnabled()) {
s_logger.debug("Allocation completed for VM: " + vmFinal);
}
}
@Override
public void allocate(final String vmInstanceName, final VirtualMachineTemplate template, final ServiceOffering serviceOffering,
final LinkedHashMap<? extends Network, List<? extends NicProfile>> networks, final DeploymentPlan plan, final HypervisorType hyperType) throws InsufficientCapacityException {
allocate(vmInstanceName, template, serviceOffering, new DiskOfferingInfo(serviceOffering), new ArrayList<DiskOfferingInfo>(), networks, plan, hyperType, null, null);
}
private VirtualMachineGuru getVmGuru(final VirtualMachine vm) {
if(vm != null) {
return _vmGurus.get(vm.getType());
}
return null;
}
@Override
public void expunge(final String vmUuid) throws ResourceUnavailableException {
try {
advanceExpunge(vmUuid);
} catch (final OperationTimedoutException e) {
throw new CloudRuntimeException("Operation timed out", e);
} catch (final ConcurrentOperationException e) {
throw new CloudRuntimeException("Concurrent operation ", e);
}
}
@Override
public void advanceExpunge(final String vmUuid) throws ResourceUnavailableException, OperationTimedoutException, ConcurrentOperationException {
final VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
advanceExpunge(vm);
}
protected void advanceExpunge(VMInstanceVO vm) throws ResourceUnavailableException, OperationTimedoutException, ConcurrentOperationException {
if (vm == null || vm.getRemoved() != null) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Unable to find vm or vm is destroyed: " + vm);
}
return;
}
advanceStop(vm.getUuid(), false);
vm = _vmDao.findByUuid(vm.getUuid());
try {
if (!stateTransitTo(vm, VirtualMachine.Event.ExpungeOperation, vm.getHostId())) {
s_logger.debug("Unable to destroy the vm because it is not in the correct state: " + vm);
throw new CloudRuntimeException("Unable to destroy " + vm);
}
} catch (final NoTransitionException e) {
s_logger.debug("Unable to destroy the vm because it is not in the correct state: " + vm);
throw new CloudRuntimeException("Unable to destroy " + vm, e);
}
if (s_logger.isDebugEnabled()) {
s_logger.debug("Destroying vm " + vm);
}
final VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm);
final HypervisorGuru hvGuru = _hvGuruMgr.getGuru(vm.getHypervisorType());
s_logger.debug("Cleaning up NICS");
final List<Command> nicExpungeCommands = hvGuru.finalizeExpungeNics(vm, profile.getNics());
_networkMgr.cleanupNics(profile);
s_logger.debug("Cleaning up hypervisor data structures (ex. SRs in XenServer) for managed storage");
final List<Command> volumeExpungeCommands = hvGuru.finalizeExpungeVolumes(vm);
final Long hostId = vm.getHostId() != null ? vm.getHostId() : vm.getLastHostId();
List<Map<String, String>> targets = getTargets(hostId, vm.getId());
if (volumeExpungeCommands != null && volumeExpungeCommands.size() > 0 && hostId != null) {
final Commands cmds = new Commands(Command.OnError.Stop);
for (final Command volumeExpungeCommand : volumeExpungeCommands) {
cmds.addCommand(volumeExpungeCommand);
}
_agentMgr.send(hostId, cmds);
if (!cmds.isSuccessful()) {
for (final Answer answer : cmds.getAnswers()) {
if (!answer.getResult()) {
s_logger.warn("Failed to expunge vm due to: " + answer.getDetails());
throw new CloudRuntimeException("Unable to expunge " + vm + " due to " + answer.getDetails());
}
}
}
}
if (hostId != null) {
volumeMgr.revokeAccess(vm.getId(), hostId);
}
// Clean up volumes based on the vm's instance id
volumeMgr.cleanupVolumes(vm.getId());
if (hostId != null && CollectionUtils.isNotEmpty(targets)) {
removeDynamicTargets(hostId, targets);
}
final VirtualMachineGuru guru = getVmGuru(vm);
guru.finalizeExpunge(vm);
//remove the overcommit detials from the uservm details
_uservmDetailsDao.removeDetails(vm.getId());
// send hypervisor-dependent commands before removing
final List<Command> finalizeExpungeCommands = hvGuru.finalizeExpunge(vm);
if (finalizeExpungeCommands != null && finalizeExpungeCommands.size() > 0) {
if (hostId != null) {
final Commands cmds = new Commands(Command.OnError.Stop);
for (final Command command : finalizeExpungeCommands) {
cmds.addCommand(command);
}
if (nicExpungeCommands != null) {
for (final Command command : nicExpungeCommands) {
cmds.addCommand(command);
}
}
_agentMgr.send(hostId, cmds);
if (!cmds.isSuccessful()) {
for (final Answer answer : cmds.getAnswers()) {
if (!answer.getResult()) {
s_logger.warn("Failed to expunge vm due to: " + answer.getDetails());
throw new CloudRuntimeException("Unable to expunge " + vm + " due to " + answer.getDetails());
}
}
}
}
}
if (s_logger.isDebugEnabled()) {
s_logger.debug("Expunged " + vm);
}
}
private List<Map<String, String>> getTargets(Long hostId, long vmId) {
List<Map<String, String>> targets = new ArrayList<>();
HostVO hostVO = _hostDao.findById(hostId);
if (hostVO == null || hostVO.getHypervisorType() != HypervisorType.VMware) {
return targets;
}
List<VolumeVO> volumes = _volsDao.findByInstance(vmId);
if (CollectionUtils.isEmpty(volumes)) {
return targets;
}
for (VolumeVO volume : volumes) {
StoragePoolVO storagePoolVO = _storagePoolDao.findById(volume.getPoolId());
if (storagePoolVO != null && storagePoolVO.isManaged()) {
Map<String, String> target = new HashMap<>();
target.put(ModifyTargetsCommand.STORAGE_HOST, storagePoolVO.getHostAddress());
target.put(ModifyTargetsCommand.STORAGE_PORT, String.valueOf(storagePoolVO.getPort()));
target.put(ModifyTargetsCommand.IQN, volume.get_iScsiName());
targets.add(target);
}
}
return targets;
}
private void removeDynamicTargets(long hostId, List<Map<String, String>> targets) {
ModifyTargetsCommand cmd = new ModifyTargetsCommand();
cmd.setTargets(targets);
cmd.setApplyToAllHostsInCluster(true);
cmd.setAdd(false);
cmd.setTargetTypeToRemove(ModifyTargetsCommand.TargetTypeToRemove.DYNAMIC);
sendModifyTargetsCommand(cmd, hostId);
}
private void sendModifyTargetsCommand(ModifyTargetsCommand cmd, long hostId) {
Answer answer = _agentMgr.easySend(hostId, cmd);
if (answer == null) {
String msg = "Unable to get an answer to the modify targets command";
s_logger.warn(msg);
}
else if (!answer.getResult()) {
String msg = "Unable to modify target on the following host: " + hostId;
s_logger.warn(msg);
}
}
@Override
public boolean start() {
// TODO, initial delay is hardcoded
_executor.scheduleAtFixedRate(new CleanupTask(), 5, VmJobStateReportInterval.value(), TimeUnit.SECONDS);
_executor.scheduleAtFixedRate(new TransitionTask(), VmOpCleanupInterval.value(), VmOpCleanupInterval.value(), TimeUnit.SECONDS);
cancelWorkItems(_nodeId);
volumeMgr.cleanupStorageJobs();
// cleanup left over place holder works
_workJobDao.expungeLeftoverWorkJobs(ManagementServerNode.getManagementServerId());
return true;
}
@Override
public boolean stop() {
return true;
}
@Override
public boolean configure(final String name, final Map<String, Object> xmlParams) throws ConfigurationException {
ReservationContextImpl.init(_entityMgr);
VirtualMachineProfileImpl.init(_entityMgr);
VmWorkMigrate.init(_entityMgr);
_executor = Executors.newScheduledThreadPool(1, new NamedThreadFactory("Vm-Operations-Cleanup"));
_nodeId = ManagementServerNode.getManagementServerId();
_agentMgr.registerForHostEvents(this, true, true, true);
_messageBus.subscribe(VirtualMachineManager.Topics.VM_POWER_STATE, MessageDispatcher.getDispatcher(this));
return true;
}
protected VirtualMachineManagerImpl() {
setStateMachine();
}
@Override
public void start(final String vmUuid, final Map<VirtualMachineProfile.Param, Object> params) {
start(vmUuid, params, null, null);
}
@Override
public void start(final String vmUuid, final Map<VirtualMachineProfile.Param, Object> params, final DeploymentPlan planToDeploy, final DeploymentPlanner planner) {
try {
advanceStart(vmUuid, params, planToDeploy, planner);
} catch (final ConcurrentOperationException e) {
throw new CloudRuntimeException("Unable to start a VM due to concurrent operation", e).add(VirtualMachine.class, vmUuid);
} catch (final InsufficientCapacityException e) {
throw new CloudRuntimeException("Unable to start a VM due to insufficient capacity", e).add(VirtualMachine.class, vmUuid);
} catch (final ResourceUnavailableException e) {
if(e.getScope() != null && e.getScope().equals(VirtualRouter.class)){
throw new CloudRuntimeException("Network is unavailable. Please contact administrator", e).add(VirtualMachine.class, vmUuid);
}
throw new CloudRuntimeException("Unable to start a VM due to unavailable resources", e).add(VirtualMachine.class, vmUuid);
}
}
protected boolean checkWorkItems(final VMInstanceVO vm, final State state) throws ConcurrentOperationException {
while (true) {
final ItWorkVO vo = _workDao.findByOutstandingWork(vm.getId(), state);
if (vo == null) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Unable to find work for VM: " + vm + " and state: " + state);
}
return true;
}
if (vo.getStep() == Step.Done) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Work for " + vm + " is " + vo.getStep());
}
return true;
}
// also check DB to get latest VM state to detect vm update from concurrent process before idle waiting to get an early exit
final VMInstanceVO instance = _vmDao.findById(vm.getId());
if (instance != null && instance.getState() == State.Running) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("VM is already started in DB: " + vm);
}
return true;
}
if (vo.getSecondsTaskIsInactive() > VmOpCancelInterval.value()) {
s_logger.warn("The task item for vm " + vm + " has been inactive for " + vo.getSecondsTaskIsInactive());
return false;
}
try {
Thread.sleep(VmOpWaitInterval.value()*1000);
} catch (final InterruptedException e) {
s_logger.info("Waiting for " + vm + " but is interrupted");
throw new ConcurrentOperationException("Waiting for " + vm + " but is interrupted");
}
s_logger.debug("Waiting some more to make sure there's no activity on " + vm);
}
}
@DB
protected Ternary<VMInstanceVO, ReservationContext, ItWorkVO> changeToStartState(final VirtualMachineGuru vmGuru, final VMInstanceVO vm, final User caller,
final Account account) throws ConcurrentOperationException {
final long vmId = vm.getId();
ItWorkVO work = new ItWorkVO(UUID.randomUUID().toString(), _nodeId, State.Starting, vm.getType(), vm.getId());
int retry = VmOpLockStateRetry.value();
while (retry-- != 0) {
try {
final ItWorkVO workFinal = work;
final Ternary<VMInstanceVO, ReservationContext, ItWorkVO> result =
Transaction.execute(new TransactionCallbackWithException<Ternary<VMInstanceVO, ReservationContext, ItWorkVO>, NoTransitionException>() {
@Override
public Ternary<VMInstanceVO, ReservationContext, ItWorkVO> doInTransaction(final TransactionStatus status) throws NoTransitionException {
final Journal journal = new Journal.LogJournal("Creating " + vm, s_logger);
final ItWorkVO work = _workDao.persist(workFinal);
final ReservationContextImpl context = new ReservationContextImpl(work.getId(), journal, caller, account);
if (stateTransitTo(vm, Event.StartRequested, null, work.getId())) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Successfully transitioned to start state for " + vm + " reservation id = " + work.getId());
}
return new Ternary<VMInstanceVO, ReservationContext, ItWorkVO>(vm, context, work);
}
return new Ternary<VMInstanceVO, ReservationContext, ItWorkVO>(null, null, work);
}
});
work = result.third();
if (result.first() != null) {
return result;
}
} catch (final NoTransitionException e) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Unable to transition into Starting state due to " + e.getMessage());
}
}
final VMInstanceVO instance = _vmDao.findById(vmId);
if (instance == null) {
throw new ConcurrentOperationException("Unable to acquire lock on " + vm);
}
if (s_logger.isDebugEnabled()) {
s_logger.debug("Determining why we're unable to update the state to Starting for " + instance + ". Retry=" + retry);
}
final State state = instance.getState();
if (state == State.Running) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("VM is already started: " + vm);
}
return null;
}
if (state.isTransitional()) {
if (!checkWorkItems(vm, state)) {
throw new ConcurrentOperationException("There are concurrent operations on " + vm);
} else {
continue;
}
}
if (state != State.Stopped) {
s_logger.debug("VM " + vm + " is not in a state to be started: " + state);
return null;
}
}
throw new ConcurrentOperationException("Unable to change the state of " + vm);
}
protected <T extends VMInstanceVO> boolean changeState(final T vm, final Event event, final Long hostId, final ItWorkVO work, final Step step) throws NoTransitionException {
// FIXME: We should do this better.
Step previousStep = null;
if (work != null) {
previousStep = work.getStep();
_workDao.updateStep(work, step);
}
boolean result = false;
try {
result = stateTransitTo(vm, event, hostId);
return result;
} finally {
if (!result && work != null) {
_workDao.updateStep(work, previousStep);
}
}
}
protected boolean areAffinityGroupsAssociated(final VirtualMachineProfile vmProfile) {
final VirtualMachine vm = vmProfile.getVirtualMachine();
final long vmGroupCount = _affinityGroupVMMapDao.countAffinityGroupsForVm(vm.getId());
if (vmGroupCount > 0) {
return true;
}
return false;
}
@Override
public void advanceStart(final String vmUuid, final Map<VirtualMachineProfile.Param, Object> params, final DeploymentPlanner planner)
throws InsufficientCapacityException, ConcurrentOperationException, ResourceUnavailableException {
advanceStart(vmUuid, params, null, planner);
}
@Override
public void advanceStart(final String vmUuid, final Map<VirtualMachineProfile.Param, Object> params, final DeploymentPlan planToDeploy, final DeploymentPlanner planner)
throws InsufficientCapacityException, ConcurrentOperationException, ResourceUnavailableException {
final AsyncJobExecutionContext jobContext = AsyncJobExecutionContext.getCurrentExecutionContext();
if ( jobContext.isJobDispatchedBy(VmWorkConstants.VM_WORK_JOB_DISPATCHER)) {
// avoid re-entrance
VmWorkJobVO placeHolder = null;
final VirtualMachine vm = _vmDao.findByUuid(vmUuid);
placeHolder = createPlaceHolderWork(vm.getId());
try {
orchestrateStart(vmUuid, params, planToDeploy, planner);
} finally {
if (placeHolder != null) {
_workJobDao.expunge(placeHolder.getId());
}
}
} else {
final Outcome<VirtualMachine> outcome = startVmThroughJobQueue(vmUuid, params, planToDeploy, planner);
try {
final VirtualMachine vm = outcome.get();
} catch (final InterruptedException e) {
throw new RuntimeException("Operation is interrupted", e);
} catch (final java.util.concurrent.ExecutionException e) {
throw new RuntimeException("Execution excetion", e);
}
final Object jobResult = _jobMgr.unmarshallResultObject(outcome.getJob());
if (jobResult != null) {
if (jobResult instanceof ConcurrentOperationException) {
throw (ConcurrentOperationException)jobResult;
} else if (jobResult instanceof ResourceUnavailableException) {
throw (ResourceUnavailableException)jobResult;
} else if (jobResult instanceof InsufficientCapacityException) {
throw (InsufficientCapacityException)jobResult;
} else if (jobResult instanceof RuntimeException) {
throw (RuntimeException)jobResult;
} else if (jobResult instanceof Throwable) {
throw new RuntimeException("Unexpected exception", (Throwable)jobResult);
}
}
}
}
private void setupAgentSecurity(final Host vmHost, final Map<String, String> sshAccessDetails, final VirtualMachine vm) throws AgentUnavailableException, OperationTimedoutException {
final String csr = caManager.generateKeyStoreAndCsr(vmHost, sshAccessDetails);
if (!Strings.isNullOrEmpty(csr)) {
final Map<String, String> ipAddressDetails = new HashMap<>(sshAccessDetails);
ipAddressDetails.remove(NetworkElementCommand.ROUTER_NAME);
final Certificate certificate = caManager.issueCertificate(csr, Arrays.asList(vm.getHostName(), vm.getInstanceName()),
new ArrayList<>(ipAddressDetails.values()), CAManager.CertValidityPeriod.value(), null);
final boolean result = caManager.deployCertificate(vmHost, certificate, false, sshAccessDetails);
if (!result) {
s_logger.error("Failed to setup certificate for system vm: " + vm.getInstanceName());
}
} else {
s_logger.error("Failed to setup keystore and generate CSR for system vm: " + vm.getInstanceName());
}
}
@Override
public void orchestrateStart(final String vmUuid, final Map<VirtualMachineProfile.Param, Object> params, final DeploymentPlan planToDeploy, final DeploymentPlanner planner)
throws InsufficientCapacityException, ConcurrentOperationException, ResourceUnavailableException {
final CallContext cctxt = CallContext.current();
final Account account = cctxt.getCallingAccount();
final User caller = cctxt.getCallingUser();
VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
final VirtualMachineGuru vmGuru = getVmGuru(vm);
final Ternary<VMInstanceVO, ReservationContext, ItWorkVO> start = changeToStartState(vmGuru, vm, caller, account);
if (start == null) {
return;
}
vm = start.first();
final ReservationContext ctx = start.second();
ItWorkVO work = start.third();
VMInstanceVO startedVm = null;
final ServiceOfferingVO offering = _offeringDao.findById(vm.getId(), vm.getServiceOfferingId());
final VirtualMachineTemplate template = _entityMgr.findByIdIncludingRemoved(VirtualMachineTemplate.class, vm.getTemplateId());
DataCenterDeployment plan = new DataCenterDeployment(vm.getDataCenterId(), vm.getPodIdToDeployIn(), null, null, null, null, ctx);
if (planToDeploy != null && planToDeploy.getDataCenterId() != 0) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("advanceStart: DeploymentPlan is provided, using dcId:" + planToDeploy.getDataCenterId() + ", podId: " + planToDeploy.getPodId() +
", clusterId: " + planToDeploy.getClusterId() + ", hostId: " + planToDeploy.getHostId() + ", poolId: " + planToDeploy.getPoolId());
}
plan =
new DataCenterDeployment(planToDeploy.getDataCenterId(), planToDeploy.getPodId(), planToDeploy.getClusterId(), planToDeploy.getHostId(),
planToDeploy.getPoolId(), planToDeploy.getPhysicalNetworkId(), ctx);
}
final HypervisorGuru hvGuru = _hvGuruMgr.getGuru(vm.getHypervisorType());
boolean canRetry = true;
ExcludeList avoids = null;
try {
final Journal journal = start.second().getJournal();
if (planToDeploy != null) {
avoids = planToDeploy.getAvoids();
}
if (avoids == null) {
avoids = new ExcludeList();
}
if (s_logger.isDebugEnabled()) {
s_logger.debug("Deploy avoids pods: " + avoids.getPodsToAvoid() + ", clusters: " + avoids.getClustersToAvoid() + ", hosts: " + avoids.getHostsToAvoid());
}
boolean planChangedByVolume = false;
boolean reuseVolume = true;
final DataCenterDeployment originalPlan = plan;
int retry = StartRetry.value();
while (retry-- != 0) { // It's != so that it can match -1.
if (reuseVolume) {
// edit plan if this vm's ROOT volume is in READY state already
final List<VolumeVO> vols = _volsDao.findReadyRootVolumesByInstance(vm.getId());
for (final VolumeVO vol : vols) {
// make sure if the templateId is unchanged. If it is changed,
// let planner
// reassign pool for the volume even if it ready.
final Long volTemplateId = vol.getTemplateId();
if (volTemplateId != null && volTemplateId.longValue() != template.getId()) {
if (s_logger.isDebugEnabled()) {
s_logger.debug(vol + " of " + vm + " is READY, but template ids don't match, let the planner reassign a new pool");
}
continue;
}
final StoragePool pool = (StoragePool)dataStoreMgr.getPrimaryDataStore(vol.getPoolId());
if (!pool.isInMaintenance()) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Root volume is ready, need to place VM in volume's cluster");
}
final long rootVolDcId = pool.getDataCenterId();
final Long rootVolPodId = pool.getPodId();
final Long rootVolClusterId = pool.getClusterId();
if (planToDeploy != null && planToDeploy.getDataCenterId() != 0) {
final Long clusterIdSpecified = planToDeploy.getClusterId();
if (clusterIdSpecified != null && rootVolClusterId != null) {
if (rootVolClusterId.longValue() != clusterIdSpecified.longValue()) {
// cannot satisfy the plan passed in to the
// planner
if (s_logger.isDebugEnabled()) {
s_logger.debug("Cannot satisfy the deployment plan passed in since the ready Root volume is in different cluster. volume's cluster: " +
rootVolClusterId + ", cluster specified: " + clusterIdSpecified);
}
throw new ResourceUnavailableException(
"Root volume is ready in different cluster, Deployment plan provided cannot be satisfied, unable to create a deployment for " +
vm, Cluster.class, clusterIdSpecified);
}
}
plan =
new DataCenterDeployment(planToDeploy.getDataCenterId(), planToDeploy.getPodId(), planToDeploy.getClusterId(),
planToDeploy.getHostId(), vol.getPoolId(), null, ctx);
} else {
plan = new DataCenterDeployment(rootVolDcId, rootVolPodId, rootVolClusterId, null, vol.getPoolId(), null, ctx);
if (s_logger.isDebugEnabled()) {
s_logger.debug(vol + " is READY, changing deployment plan to use this pool's dcId: " + rootVolDcId + " , podId: " + rootVolPodId +
" , and clusterId: " + rootVolClusterId);
}
planChangedByVolume = true;
}
}
}
}
final Account owner = _entityMgr.findById(Account.class, vm.getAccountId());
final VirtualMachineProfileImpl vmProfile = new VirtualMachineProfileImpl(vm, template, offering, owner, params);
DeployDestination dest = null;
try {
dest = _dpMgr.planDeployment(vmProfile, plan, avoids, planner);
} catch (final AffinityConflictException e2) {
s_logger.warn("Unable to create deployment, affinity rules associted to the VM conflict", e2);
throw new CloudRuntimeException("Unable to create deployment, affinity rules associted to the VM conflict");
}
if (dest == null) {
if (planChangedByVolume) {
plan = originalPlan;
planChangedByVolume = false;
//do not enter volume reuse for next retry, since we want to look for resources outside the volume's cluster
reuseVolume = false;
continue;
}
throw new InsufficientServerCapacityException("Unable to create a deployment for " + vmProfile, DataCenter.class, plan.getDataCenterId(),
areAffinityGroupsAssociated(vmProfile));
}
if (dest != null) {
avoids.addHost(dest.getHost().getId());
journal.record("Deployment found ", vmProfile, dest);
}
long destHostId = dest.getHost().getId();
vm.setPodIdToDeployIn(dest.getPod().getId());
final Long cluster_id = dest.getCluster().getId();
final ClusterDetailsVO cluster_detail_cpu = _clusterDetailsDao.findDetail(cluster_id, "cpuOvercommitRatio");
final ClusterDetailsVO cluster_detail_ram = _clusterDetailsDao.findDetail(cluster_id, "memoryOvercommitRatio");
//storing the value of overcommit in the vm_details table for doing a capacity check in case the cluster overcommit ratio is changed.
if (_uservmDetailsDao.findDetail(vm.getId(), "cpuOvercommitRatio") == null &&
(Float.parseFloat(cluster_detail_cpu.getValue()) > 1f || Float.parseFloat(cluster_detail_ram.getValue()) > 1f)) {
_uservmDetailsDao.addDetail(vm.getId(), "cpuOvercommitRatio", cluster_detail_cpu.getValue(), true);
_uservmDetailsDao.addDetail(vm.getId(), "memoryOvercommitRatio", cluster_detail_ram.getValue(), true);
} else if (_uservmDetailsDao.findDetail(vm.getId(), "cpuOvercommitRatio") != null) {
_uservmDetailsDao.addDetail(vm.getId(), "cpuOvercommitRatio", cluster_detail_cpu.getValue(), true);
_uservmDetailsDao.addDetail(vm.getId(), "memoryOvercommitRatio", cluster_detail_ram.getValue(), true);
}
vmProfile.setCpuOvercommitRatio(Float.parseFloat(cluster_detail_cpu.getValue()));
vmProfile.setMemoryOvercommitRatio(Float.parseFloat(cluster_detail_ram.getValue()));
StartAnswer startAnswer = null;
try {
if (!changeState(vm, Event.OperationRetry, destHostId, work, Step.Prepare)) {
throw new ConcurrentOperationException("Unable to update the state of the Virtual Machine "+vm.getUuid()+" oldstate: "+vm.getState()+ "Event :"+Event.OperationRetry);
}
} catch (final NoTransitionException e1) {
throw new ConcurrentOperationException(e1.getMessage());
}
try {
_networkMgr.prepare(vmProfile, new DeployDestination(dest.getDataCenter(), dest.getPod(), null, null, dest.getStorageForDisks()), ctx);
if (vm.getHypervisorType() != HypervisorType.BareMetal) {
volumeMgr.prepare(vmProfile, dest);
}
//since StorageMgr succeeded in volume creation, reuse Volume for further tries until current cluster has capacity
if (!reuseVolume) {
reuseVolume = true;
}
Commands cmds = null;
vmGuru.finalizeVirtualMachineProfile(vmProfile, dest, ctx);
final VirtualMachineTO vmTO = hvGuru.implement(vmProfile);
handlePath(vmTO.getDisks(), vm.getHypervisorType());
cmds = new Commands(Command.OnError.Stop);
cmds.addCommand(new StartCommand(vmTO, dest.getHost(), getExecuteInSequence(vm.getHypervisorType())));
vmGuru.finalizeDeployment(cmds, vmProfile, dest, ctx);
work = _workDao.findById(work.getId());
if (work == null || work.getStep() != Step.Prepare) {
throw new ConcurrentOperationException("Work steps have been changed: " + work);
}
_workDao.updateStep(work, Step.Starting);
_agentMgr.send(destHostId, cmds);
_workDao.updateStep(work, Step.Started);
startAnswer = cmds.getAnswer(StartAnswer.class);
if (startAnswer != null && startAnswer.getResult()) {
handlePath(vmTO.getDisks(), startAnswer.getIqnToData());
final String host_guid = startAnswer.getHost_guid();
if (host_guid != null) {
final HostVO finalHost = _resourceMgr.findHostByGuid(host_guid);
if (finalHost == null) {
throw new CloudRuntimeException("Host Guid " + host_guid + " doesn't exist in DB, something went wrong while processing start answer: "+startAnswer);
}
destHostId = finalHost.getId();
}
if (vmGuru.finalizeStart(vmProfile, destHostId, cmds, ctx)) {
syncDiskChainChange(startAnswer);
if (!changeState(vm, Event.OperationSucceeded, destHostId, work, Step.Done)) {
s_logger.error("Unable to transition to a new state. VM uuid: "+vm.getUuid()+ "VM oldstate:"+vm.getState()+"Event:"+Event.OperationSucceeded);
throw new ConcurrentOperationException("Failed to deploy VM"+ vm.getUuid());
}
// Update GPU device capacity
final GPUDeviceTO gpuDevice = startAnswer.getVirtualMachine().getGpuDevice();
if (gpuDevice != null) {
_resourceMgr.updateGPUDetails(destHostId, gpuDevice.getGroupDetails());
}
// Remove the information on whether it was a deploy vm request.The deployvm=true information
// is set only when the vm is being deployed. When a vm is started from a stop state the
// information isn't set,
if (_uservmDetailsDao.findDetail(vm.getId(), "deployvm") != null) {
_uservmDetailsDao.removeDetail(vm.getId(), "deployvm");
}
startedVm = vm;
if (s_logger.isDebugEnabled()) {
s_logger.debug("Start completed for VM " + vm);
}
final Host vmHost = _hostDao.findById(destHostId);
if (vmHost != null && (VirtualMachine.Type.ConsoleProxy.equals(vm.getType()) ||
VirtualMachine.Type.SecondaryStorageVm.equals(vm.getType())) && caManager.canProvisionCertificates()) {
final Map<String, String> sshAccessDetails = _networkMgr.getSystemVMAccessDetails(vm);
for (int retries = 3; retries > 0; retries--) {
try {
setupAgentSecurity(vmHost, sshAccessDetails, vm);
return;
} catch (final Exception e) {
s_logger.error("Retrying after catching exception while trying to secure agent for systemvm id=" + vm.getId(), e);
}
}
throw new CloudRuntimeException("Failed to setup and secure agent for systemvm id=" + vm.getId());
}
return;
} else {
if (s_logger.isDebugEnabled()) {
s_logger.info("The guru did not like the answers so stopping " + vm);
}
StopCommand stopCmd = new StopCommand(vm, getExecuteInSequence(vm.getHypervisorType()), false);
stopCmd.setControlIp(getControlNicIpForVM(vm));
final StopCommand cmd = stopCmd;
final Answer answer = _agentMgr.easySend(destHostId, cmd);
if (answer != null && answer instanceof StopAnswer) {
final StopAnswer stopAns = (StopAnswer)answer;
if (vm.getType() == VirtualMachine.Type.User) {
final String platform = stopAns.getPlatform();
if (platform != null) {
final Map<String,String> vmmetadata = new HashMap<String,String>();
vmmetadata.put(vm.getInstanceName(), platform);
syncVMMetaData(vmmetadata);
}
}
}
if (answer == null || !answer.getResult()) {
s_logger.warn("Unable to stop " + vm + " due to " + (answer != null ? answer.getDetails() : "no answers"));
_haMgr.scheduleStop(vm, destHostId, WorkType.ForceStop);
throw new ExecutionException("Unable to stop this VM, "+vm.getUuid()+" so we are unable to retry the start operation");
}
throw new ExecutionException("Unable to start VM:"+vm.getUuid()+" due to error in finalizeStart, not retrying");
}
}
s_logger.info("Unable to start VM on " + dest.getHost() + " due to " + (startAnswer == null ? " no start answer" : startAnswer.getDetails()));
if (startAnswer != null && startAnswer.getContextParam("stopRetry") != null) {
break;
}
} catch (OperationTimedoutException e) {
s_logger.debug("Unable to send the start command to host " + dest.getHost()+" failed to start VM: "+vm.getUuid());
if (e.isActive()) {
_haMgr.scheduleStop(vm, destHostId, WorkType.CheckStop);
}
canRetry = false;
throw new AgentUnavailableException("Unable to start " + vm.getHostName(), destHostId, e);
} catch (final ResourceUnavailableException e) {
s_logger.info("Unable to contact resource.", e);
if (!avoids.add(e)) {
if (e.getScope() == Volume.class || e.getScope() == Nic.class) {
throw e;
} else {
s_logger.warn("unexpected ResourceUnavailableException : " + e.getScope().getName(), e);
throw e;
}
}
} catch (final InsufficientCapacityException e) {
s_logger.info("Insufficient capacity ", e);
if (!avoids.add(e)) {
if (e.getScope() == Volume.class || e.getScope() == Nic.class) {
throw e;
} else {
s_logger.warn("unexpected InsufficientCapacityException : " + e.getScope().getName(), e);
}
}
} catch (final ExecutionException e) {
s_logger.error("Failed to start instance " + vm, e);
throw new AgentUnavailableException("Unable to start instance due to " + e.getMessage(), destHostId, e);
} catch (final NoTransitionException e) {
s_logger.error("Failed to start instance " + vm, e);
throw new AgentUnavailableException("Unable to start instance due to " + e.getMessage(), destHostId, e);
} finally {
if (startedVm == null && canRetry) {
final Step prevStep = work.getStep();
_workDao.updateStep(work, Step.Release);
// If previous step was started/ing && we got a valid answer
if ((prevStep == Step.Started || prevStep == Step.Starting) && startAnswer != null && startAnswer.getResult()) { //TODO check the response of cleanup and record it in DB for retry
cleanup(vmGuru, vmProfile, work, Event.OperationFailed, false);
} else {
//if step is not starting/started, send cleanup command with force=true
cleanup(vmGuru, vmProfile, work, Event.OperationFailed, true);
}
}
}
}
} finally {
if (startedVm == null) {
if (canRetry) {
try {
changeState(vm, Event.OperationFailed, null, work, Step.Done);
} catch (final NoTransitionException e) {
throw new ConcurrentOperationException(e.getMessage());
}
}
}
if (planToDeploy != null) {
planToDeploy.setAvoids(avoids);
}
}
if (startedVm == null) {
throw new CloudRuntimeException("Unable to start instance '" + vm.getHostName() + "' (" + vm.getUuid() + "), see management server log for details");
}
}
// for managed storage on KVM, need to make sure the path field of the volume in question is populated with the IQN
private void handlePath(final DiskTO[] disks, final HypervisorType hypervisorType) {
if (hypervisorType != HypervisorType.KVM) {
return;
}
if (disks != null) {
for (final DiskTO disk : disks) {
final Map<String, String> details = disk.getDetails();
final boolean isManaged = details != null && Boolean.parseBoolean(details.get(DiskTO.MANAGED));
if (isManaged && disk.getPath() == null) {
final Long volumeId = disk.getData().getId();
final VolumeVO volume = _volsDao.findById(volumeId);
disk.setPath(volume.get_iScsiName());
if (disk.getData() instanceof VolumeObjectTO) {
final VolumeObjectTO volTo = (VolumeObjectTO)disk.getData();
volTo.setPath(volume.get_iScsiName());
}
volume.setPath(volume.get_iScsiName());
_volsDao.update(volumeId, volume);
}
}
}
}
// for managed storage on XenServer and VMware, need to update the DB with a path if the VDI/VMDK file was newly created
private void handlePath(final DiskTO[] disks, final Map<String, Map<String, String>> iqnToData) {
if (disks != null && iqnToData != null) {
for (final DiskTO disk : disks) {
final Map<String, String> details = disk.getDetails();
final boolean isManaged = details != null && Boolean.parseBoolean(details.get(DiskTO.MANAGED));
if (isManaged) {
final Long volumeId = disk.getData().getId();
final VolumeVO volume = _volsDao.findById(volumeId);
final String iScsiName = volume.get_iScsiName();
boolean update = false;
final Map<String, String> data = iqnToData.get(iScsiName);
if (data != null) {
final String path = data.get(StartAnswer.PATH);
if (path != null) {
volume.setPath(path);
update = true;
}
final String imageFormat = data.get(StartAnswer.IMAGE_FORMAT);
if (imageFormat != null) {
volume.setFormat(ImageFormat.valueOf(imageFormat));
update = true;
}
if (update) {
_volsDao.update(volumeId, volume);
}
}
}
}
}
}
private void syncDiskChainChange(final StartAnswer answer) {
final VirtualMachineTO vmSpec = answer.getVirtualMachine();
for (final DiskTO disk : vmSpec.getDisks()) {
if (disk.getType() != Volume.Type.ISO) {
final VolumeObjectTO vol = (VolumeObjectTO)disk.getData();
final VolumeVO volume = _volsDao.findById(vol.getId());
// Use getPath() from VolumeVO to get a fresh copy of what's in the DB.
// Before doing this, in a certain situation, getPath() from VolumeObjectTO
// returned null instead of an actual path (because it was out of date with the DB).
if(vol.getPath() != null) {
volumeMgr.updateVolumeDiskChain(vol.getId(), vol.getPath(), vol.getChainInfo());
} else {
volumeMgr.updateVolumeDiskChain(vol.getId(), volume.getPath(), vol.getChainInfo());
}
}
}
}
@Override
public void stop(final String vmUuid) throws ResourceUnavailableException {
try {
advanceStop(vmUuid, false);
} catch (final OperationTimedoutException e) {
throw new AgentUnavailableException("Unable to stop vm because the operation to stop timed out", e.getAgentId(), e);
} catch (final ConcurrentOperationException e) {
throw new CloudRuntimeException("Unable to stop vm because of a concurrent operation", e);
}
}
@Override
public void stopForced(String vmUuid) throws ResourceUnavailableException {
try {
advanceStop(vmUuid, true);
} catch (final OperationTimedoutException e) {
throw new AgentUnavailableException("Unable to stop vm because the operation to stop timed out", e.getAgentId(), e);
} catch (final ConcurrentOperationException e) {
throw new CloudRuntimeException("Unable to stop vm because of a concurrent operation", e);
}
}
@Override
public boolean getExecuteInSequence(final HypervisorType hypervisorType) {
if (HypervisorType.KVM == hypervisorType || HypervisorType.XenServer == hypervisorType || HypervisorType.Hyperv == hypervisorType || HypervisorType.LXC == hypervisorType) {
return false;
} else if (HypervisorType.VMware == hypervisorType) {
final Boolean fullClone = HypervisorGuru.VmwareFullClone.value();
return fullClone;
} else {
return ExecuteInSequence.value();
}
}
private List<Map<String, String>> getVolumesToDisconnect(VirtualMachine vm) {
List<Map<String, String>> volumesToDisconnect = new ArrayList<>();
List<VolumeVO> volumes = _volsDao.findByInstance(vm.getId());
if (CollectionUtils.isEmpty(volumes)) {
return volumesToDisconnect;
}
for (VolumeVO volume : volumes) {
StoragePoolVO storagePool = _storagePoolDao.findById(volume.getPoolId());
if (storagePool != null && storagePool.isManaged()) {
Map<String, String> info = new HashMap<>(3);
info.put(DiskTO.STORAGE_HOST, storagePool.getHostAddress());
info.put(DiskTO.STORAGE_PORT, String.valueOf(storagePool.getPort()));
info.put(DiskTO.IQN, volume.get_iScsiName());
volumesToDisconnect.add(info);
}
}
return volumesToDisconnect;
}
protected boolean sendStop(final VirtualMachineGuru guru, final VirtualMachineProfile profile, final boolean force, final boolean checkBeforeCleanup) {
final VirtualMachine vm = profile.getVirtualMachine();
StopCommand stpCmd = new StopCommand(vm, getExecuteInSequence(vm.getHypervisorType()), checkBeforeCleanup);
stpCmd.setControlIp(getControlNicIpForVM(vm));
stpCmd.setVolumesToDisconnect(getVolumesToDisconnect(vm));
final StopCommand stop = stpCmd;
try {
Answer answer = null;
if(vm.getHostId() != null) {
answer = _agentMgr.send(vm.getHostId(), stop);
}
if (answer != null && answer instanceof StopAnswer) {
final StopAnswer stopAns = (StopAnswer)answer;
if (vm.getType() == VirtualMachine.Type.User) {
final String platform = stopAns.getPlatform();
if (platform != null) {
final UserVmVO userVm = _userVmDao.findById(vm.getId());
_userVmDao.loadDetails(userVm);
userVm.setDetail("platform", platform);
_userVmDao.saveDetails(userVm);
}
}
final GPUDeviceTO gpuDevice = stop.getGpuDevice();
if (gpuDevice != null) {
_resourceMgr.updateGPUDetails(vm.getHostId(), gpuDevice.getGroupDetails());
}
if (!answer.getResult()) {
final String details = answer.getDetails();
s_logger.debug("Unable to stop VM due to " + details);
return false;
}
guru.finalizeStop(profile, answer);
} else {
s_logger.error("Invalid answer received in response to a StopCommand for " + vm.getInstanceName());
return false;
}
} catch (final AgentUnavailableException e) {
if (!force) {
return false;
}
} catch (final OperationTimedoutException e) {
if (!force) {
return false;
}
}
return true;
}
protected boolean cleanup(final VirtualMachineGuru guru, final VirtualMachineProfile profile, final ItWorkVO work, final Event event, final boolean cleanUpEvenIfUnableToStop) {
final VirtualMachine vm = profile.getVirtualMachine();
final State state = vm.getState();
s_logger.debug("Cleaning up resources for the vm " + vm + " in " + state + " state");
try {
if (state == State.Starting) {
if (work != null) {
final Step step = work.getStep();
if (step == Step.Starting && !cleanUpEvenIfUnableToStop) {
s_logger.warn("Unable to cleanup vm " + vm + "; work state is incorrect: " + step);
return false;
}
if (step == Step.Started || step == Step.Starting || step == Step.Release) {
if (vm.getHostId() != null) {
if (!sendStop(guru, profile, cleanUpEvenIfUnableToStop, false)) {
s_logger.warn("Failed to stop vm " + vm + " in " + State.Starting + " state as a part of cleanup process");
return false;
}
}
}
if (step != Step.Release && step != Step.Prepare && step != Step.Started && step != Step.Starting) {
s_logger.debug("Cleanup is not needed for vm " + vm + "; work state is incorrect: " + step);
return true;
}
} else {
if (vm.getHostId() != null) {
if (!sendStop(guru, profile, cleanUpEvenIfUnableToStop, false)) {
s_logger.warn("Failed to stop vm " + vm + " in " + State.Starting + " state as a part of cleanup process");
return false;
}
}
}
} else if (state == State.Stopping) {
if (vm.getHostId() != null) {
if (!sendStop(guru, profile, cleanUpEvenIfUnableToStop, false)) {
s_logger.warn("Failed to stop vm " + vm + " in " + State.Stopping + " state as a part of cleanup process");
return false;
}
}
} else if (state == State.Migrating) {
if (vm.getHostId() != null) {
if (!sendStop(guru, profile, cleanUpEvenIfUnableToStop, false)) {
s_logger.warn("Failed to stop vm " + vm + " in " + State.Migrating + " state as a part of cleanup process");
return false;
}
}
if (vm.getLastHostId() != null) {
if (!sendStop(guru, profile, cleanUpEvenIfUnableToStop, false)) {
s_logger.warn("Failed to stop vm " + vm + " in " + State.Migrating + " state as a part of cleanup process");
return false;
}
}
} else if (state == State.Running) {
if (!sendStop(guru, profile, cleanUpEvenIfUnableToStop, false)) {
s_logger.warn("Failed to stop vm " + vm + " in " + State.Running + " state as a part of cleanup process");
return false;
}
}
} finally {
try {
_networkMgr.release(profile, cleanUpEvenIfUnableToStop);
s_logger.debug("Successfully released network resources for the vm " + vm);
} catch (final Exception e) {
s_logger.warn("Unable to release some network resources.", e);
}
volumeMgr.release(profile);
s_logger.debug("Successfully cleanued up resources for the vm " + vm + " in " + state + " state");
}
return true;
}
@Override
public void advanceStop(final String vmUuid, final boolean cleanUpEvenIfUnableToStop)
throws AgentUnavailableException, OperationTimedoutException, ConcurrentOperationException {
final AsyncJobExecutionContext jobContext = AsyncJobExecutionContext.getCurrentExecutionContext();
if (jobContext.isJobDispatchedBy(VmWorkConstants.VM_WORK_JOB_DISPATCHER)) {
// avoid re-entrance
VmWorkJobVO placeHolder = null;
final VirtualMachine vm = _vmDao.findByUuid(vmUuid);
placeHolder = createPlaceHolderWork(vm.getId());
try {
orchestrateStop(vmUuid, cleanUpEvenIfUnableToStop);
} finally {
if (placeHolder != null) {
_workJobDao.expunge(placeHolder.getId());
}
}
} else {
final Outcome<VirtualMachine> outcome = stopVmThroughJobQueue(vmUuid, cleanUpEvenIfUnableToStop);
try {
final VirtualMachine vm = outcome.get();
} catch (final InterruptedException e) {
throw new RuntimeException("Operation is interrupted", e);
} catch (final java.util.concurrent.ExecutionException e) {
throw new RuntimeException("Execution excetion", e);
}
final Object jobResult = _jobMgr.unmarshallResultObject(outcome.getJob());
if (jobResult != null) {
if (jobResult instanceof AgentUnavailableException) {
throw (AgentUnavailableException)jobResult;
} else if (jobResult instanceof ConcurrentOperationException) {
throw (ConcurrentOperationException)jobResult;
} else if (jobResult instanceof OperationTimedoutException) {
throw (OperationTimedoutException)jobResult;
} else if (jobResult instanceof RuntimeException) {
throw (RuntimeException)jobResult;
} else if (jobResult instanceof Throwable) {
throw new RuntimeException("Unexpected exception", (Throwable)jobResult);
}
}
}
}
private void orchestrateStop(final String vmUuid, final boolean cleanUpEvenIfUnableToStop) throws AgentUnavailableException, OperationTimedoutException, ConcurrentOperationException {
final VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
advanceStop(vm, cleanUpEvenIfUnableToStop);
}
private void advanceStop(final VMInstanceVO vm, final boolean cleanUpEvenIfUnableToStop) throws AgentUnavailableException, OperationTimedoutException,
ConcurrentOperationException {
final State state = vm.getState();
if (state == State.Stopped) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("VM is already stopped: " + vm);
}
return;
}
if (state == State.Destroyed || state == State.Expunging || state == State.Error) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Stopped called on " + vm + " but the state is " + state);
}
return;
}
// grab outstanding work item if any
final ItWorkVO work = _workDao.findByOutstandingWork(vm.getId(), vm.getState());
if (work != null) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Found an outstanding work item for this vm " + vm + " with state:" + vm.getState() + ", work id:" + work.getId());
}
}
final Long hostId = vm.getHostId();
if (hostId == null) {
if (!cleanUpEvenIfUnableToStop) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("HostId is null but this is not a forced stop, cannot stop vm " + vm + " with state:" + vm.getState());
}
throw new CloudRuntimeException("Unable to stop " + vm);
}
try {
stateTransitTo(vm, Event.AgentReportStopped, null, null);
} catch (final NoTransitionException e) {
s_logger.warn(e.getMessage());
}
// mark outstanding work item if any as done
if (work != null) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Updating work item to Done, id:" + work.getId());
}
work.setStep(Step.Done);
_workDao.update(work.getId(), work);
}
return;
} else {
HostVO host = _hostDao.findById(hostId);
if (!cleanUpEvenIfUnableToStop && vm.getState() == State.Running && host.getResourceState() == ResourceState.PrepareForMaintenance) {
s_logger.debug("Host is in PrepareForMaintenance state - Stop VM operation on the VM id: " + vm.getId() + " is not allowed");
throw new CloudRuntimeException("Stop VM operation on the VM id: " + vm.getId() + " is not allowed as host is preparing for maintenance mode");
}
}
final VirtualMachineGuru vmGuru = getVmGuru(vm);
final VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm);
try {
if (!stateTransitTo(vm, Event.StopRequested, vm.getHostId())) {
throw new ConcurrentOperationException("VM is being operated on.");
}
} catch (final NoTransitionException e1) {
if (!cleanUpEvenIfUnableToStop) {
throw new CloudRuntimeException("We cannot stop " + vm + " when it is in state " + vm.getState());
}
final boolean doCleanup = true;
if (s_logger.isDebugEnabled()) {
s_logger.debug("Unable to transition the state but we're moving on because it's forced stop");
}
if (doCleanup) {
if (cleanup(vmGuru, new VirtualMachineProfileImpl(vm), work, Event.StopRequested, cleanUpEvenIfUnableToStop)) {
try {
if (s_logger.isDebugEnabled() && work != null) {
s_logger.debug("Updating work item to Done, id:" + work.getId());
}
if (!changeState(vm, Event.AgentReportStopped, null, work, Step.Done)) {
throw new CloudRuntimeException("Unable to stop " + vm);
}
} catch (final NoTransitionException e) {
s_logger.warn("Unable to cleanup " + vm);
throw new CloudRuntimeException("Unable to stop " + vm, e);
}
} else {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Failed to cleanup VM: " + vm);
}
throw new CloudRuntimeException("Failed to cleanup " + vm + " , current state " + vm.getState());
}
}
}
if (vm.getState() != State.Stopping) {
throw new CloudRuntimeException("We cannot proceed with stop VM " + vm + " since it is not in 'Stopping' state, current state: " + vm.getState());
}
vmGuru.prepareStop(profile);
final StopCommand stop = new StopCommand(vm, getExecuteInSequence(vm.getHypervisorType()), false, cleanUpEvenIfUnableToStop);
stop.setControlIp(getControlNicIpForVM(vm));
boolean stopped = false;
Answer answer = null;
try {
answer = _agentMgr.send(vm.getHostId(), stop);
if (answer != null) {
if (answer instanceof StopAnswer) {
final StopAnswer stopAns = (StopAnswer)answer;
if (vm.getType() == VirtualMachine.Type.User) {
final String platform = stopAns.getPlatform();
if (platform != null) {
final UserVmVO userVm = _userVmDao.findById(vm.getId());
_userVmDao.loadDetails(userVm);
userVm.setDetail("platform", platform);
_userVmDao.saveDetails(userVm);
}
}
}
stopped = answer.getResult();
if (!stopped) {
throw new CloudRuntimeException("Unable to stop the virtual machine due to " + answer.getDetails());
}
vmGuru.finalizeStop(profile, answer);
final GPUDeviceTO gpuDevice = stop.getGpuDevice();
if (gpuDevice != null) {
_resourceMgr.updateGPUDetails(vm.getHostId(), gpuDevice.getGroupDetails());
}
} else {
throw new CloudRuntimeException("Invalid answer received in response to a StopCommand on " + vm.instanceName);
}
} catch (final AgentUnavailableException e) {
s_logger.warn("Unable to stop vm, agent unavailable: " + e.toString());
} catch (final OperationTimedoutException e) {
s_logger.warn("Unable to stop vm, operation timed out: " + e.toString());
} finally {
if (!stopped) {
if (!cleanUpEvenIfUnableToStop) {
s_logger.warn("Unable to stop vm " + vm);
try {
stateTransitTo(vm, Event.OperationFailed, vm.getHostId());
} catch (final NoTransitionException e) {
s_logger.warn("Unable to transition the state " + vm);
}
throw new CloudRuntimeException("Unable to stop " + vm);
} else {
s_logger.warn("Unable to actually stop " + vm + " but continue with release because it's a force stop");
vmGuru.finalizeStop(profile, answer);
}
}
}
if (s_logger.isDebugEnabled()) {
s_logger.debug(vm + " is stopped on the host. Proceeding to release resource held.");
}
try {
_networkMgr.release(profile, cleanUpEvenIfUnableToStop);
s_logger.debug("Successfully released network resources for the vm " + vm);
} catch (final Exception e) {
s_logger.warn("Unable to release some network resources.", e);
}
try {
if (vm.getHypervisorType() != HypervisorType.BareMetal) {
volumeMgr.release(profile);
s_logger.debug("Successfully released storage resources for the vm " + vm);
}
} catch (final Exception e) {
s_logger.warn("Unable to release storage resources.", e);
}
try {
if (work != null) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Updating the outstanding work item to Done, id:" + work.getId());
}
work.setStep(Step.Done);
_workDao.update(work.getId(), work);
}
if (!stateTransitTo(vm, Event.OperationSucceeded, null)) {
throw new CloudRuntimeException("unable to stop " + vm);
}
} catch (final NoTransitionException e) {
s_logger.warn(e.getMessage());
throw new CloudRuntimeException("Unable to stop " + vm);
}
}
private void setStateMachine() {
_stateMachine = VirtualMachine.State.getStateMachine();
}
protected boolean stateTransitTo(final VMInstanceVO vm, final VirtualMachine.Event e, final Long hostId, final String reservationId) throws NoTransitionException {
// if there are active vm snapshots task, state change is not allowed
// Disable this hacking thing, VM snapshot task need to be managed by its orchestartion flow istelf instead of
// hacking it here at general VM manager
/*
if (_vmSnapshotMgr.hasActiveVMSnapshotTasks(vm.getId())) {
s_logger.error("State transit with event: " + e + " failed due to: " + vm.getInstanceName() + " has active VM snapshots tasks");
return false;
}
*/
vm.setReservationId(reservationId);
return _stateMachine.transitTo(vm, e, new Pair<Long, Long>(vm.getHostId(), hostId), _vmDao);
}
@Override
public boolean stateTransitTo(final VirtualMachine vm1, final VirtualMachine.Event e, final Long hostId) throws NoTransitionException {
final VMInstanceVO vm = (VMInstanceVO)vm1;
/*
* Remove the hacking logic here.
// if there are active vm snapshots task, state change is not allowed
if (_vmSnapshotMgr.hasActiveVMSnapshotTasks(vm.getId())) {
s_logger.error("State transit with event: " + e + " failed due to: " + vm.getInstanceName() + " has active VM snapshots tasks");
return false;
}
*/
final State oldState = vm.getState();
if (oldState == State.Starting) {
if (e == Event.OperationSucceeded) {
vm.setLastHostId(hostId);
}
} else if (oldState == State.Stopping) {
if (e == Event.OperationSucceeded) {
vm.setLastHostId(vm.getHostId());
}
}
return _stateMachine.transitTo(vm, e, new Pair<Long, Long>(vm.getHostId(), hostId), _vmDao);
}
@Override
public void destroy(final String vmUuid, final boolean expunge) throws AgentUnavailableException, OperationTimedoutException, ConcurrentOperationException {
VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
if (vm == null || vm.getState() == State.Destroyed || vm.getState() == State.Expunging || vm.getRemoved() != null) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Unable to find vm or vm is destroyed: " + vm);
}
return;
}
if (s_logger.isDebugEnabled()) {
s_logger.debug("Destroying vm " + vm + ", expunge flag " + (expunge ? "on" : "off"));
}
advanceStop(vmUuid, VmDestroyForcestop.value());
deleteVMSnapshots(vm, expunge);
Transaction.execute(new TransactionCallbackWithExceptionNoReturn<CloudRuntimeException>() {
public void doInTransactionWithoutResult(final TransactionStatus status) throws CloudRuntimeException {
VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
try {
if (!stateTransitTo(vm, VirtualMachine.Event.DestroyRequested, vm.getHostId())) {
s_logger.debug("Unable to destroy the vm because it is not in the correct state: " + vm);
throw new CloudRuntimeException("Unable to destroy " + vm);
} else {
if (expunge) {
if (!stateTransitTo(vm, VirtualMachine.Event.ExpungeOperation, vm.getHostId())) {
s_logger.debug("Unable to expunge the vm because it is not in the correct state: " + vm);
throw new CloudRuntimeException("Unable to expunge " + vm);
}
}
}
} catch (final NoTransitionException e) {
s_logger.debug(e.getMessage());
throw new CloudRuntimeException("Unable to destroy " + vm, e);
}
}
});
}
/**
* Delete vm snapshots depending on vm's hypervisor type. For Vmware, vm snapshots removal is delegated to vm cleanup thread
* to reduce tasks sent to hypervisor (one tasks to delete vm snapshots and vm itself
* instead of one task for each vm snapshot plus another for the vm)
* @param vm vm
* @param expunge indicates if vm should be expunged
*/
private void deleteVMSnapshots(VMInstanceVO vm, boolean expunge) {
if (! vm.getHypervisorType().equals(HypervisorType.VMware)) {
if (!_vmSnapshotMgr.deleteAllVMSnapshots(vm.getId(), null)) {
s_logger.debug("Unable to delete all snapshots for " + vm);
throw new CloudRuntimeException("Unable to delete vm snapshots for " + vm);
}
}
else {
if (expunge) {
_vmSnapshotMgr.deleteVMSnapshotsFromDB(vm.getId());
}
}
}
protected boolean checkVmOnHost(final VirtualMachine vm, final long hostId) throws AgentUnavailableException, OperationTimedoutException {
final Answer answer = _agentMgr.send(hostId, new CheckVirtualMachineCommand(vm.getInstanceName()));
if (answer == null || !answer.getResult()) {
return false;
}
if (answer instanceof CheckVirtualMachineAnswer) {
final CheckVirtualMachineAnswer vmAnswer = (CheckVirtualMachineAnswer)answer;
if (vmAnswer.getState() == PowerState.PowerOff) {
return false;
}
}
UserVmVO userVm = _userVmDao.findById(vm.getId());
if (userVm != null) {
List<VMSnapshotVO> vmSnapshots = _vmSnapshotDao.findByVm(vm.getId());
RestoreVMSnapshotCommand command = _vmSnapshotMgr.createRestoreCommand(userVm, vmSnapshots);
if (command != null) {
RestoreVMSnapshotAnswer restoreVMSnapshotAnswer = (RestoreVMSnapshotAnswer) _agentMgr.send(hostId, command);
if (restoreVMSnapshotAnswer == null || !restoreVMSnapshotAnswer.getResult()) {
s_logger.warn("Unable to restore the vm snapshot from image file after live migration of vm with vmsnapshots: " + restoreVMSnapshotAnswer.getDetails());
}
}
}
return true;
}
@Override
public void storageMigration(final String vmUuid, final StoragePool destPool) {
final AsyncJobExecutionContext jobContext = AsyncJobExecutionContext.getCurrentExecutionContext();
if (jobContext.isJobDispatchedBy(VmWorkConstants.VM_WORK_JOB_DISPATCHER)) {
// avoid re-entrance
VmWorkJobVO placeHolder = null;
final VirtualMachine vm = _vmDao.findByUuid(vmUuid);
placeHolder = createPlaceHolderWork(vm.getId());
try {
orchestrateStorageMigration(vmUuid, destPool);
} finally {
if (placeHolder != null) {
_workJobDao.expunge(placeHolder.getId());
}
}
} else {
final Outcome<VirtualMachine> outcome = migrateVmStorageThroughJobQueue(vmUuid, destPool);
try {
final VirtualMachine vm = outcome.get();
} catch (final InterruptedException e) {
throw new RuntimeException("Operation is interrupted", e);
} catch (final java.util.concurrent.ExecutionException e) {
throw new RuntimeException("Execution excetion", e);
}
final Object jobResult = _jobMgr.unmarshallResultObject(outcome.getJob());
if (jobResult != null) {
if (jobResult instanceof RuntimeException) {
throw (RuntimeException)jobResult;
} else if (jobResult instanceof Throwable) {
throw new RuntimeException("Unexpected exception", (Throwable)jobResult);
}
}
}
}
private void orchestrateStorageMigration(final String vmUuid, final StoragePool destPool) {
final VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
if (destPool == null) {
throw new CloudRuntimeException("Unable to migrate vm: missing destination storage pool");
}
try {
stateTransitTo(vm, VirtualMachine.Event.StorageMigrationRequested, null);
} catch (final NoTransitionException e) {
s_logger.debug("Unable to migrate vm: " + e.toString());
throw new CloudRuntimeException("Unable to migrate vm: " + e.toString());
}
final VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm);
boolean migrationResult = false;
try {
migrationResult = volumeMgr.storageMigration(profile, destPool);
if (migrationResult) {
//if the vm is migrated to different pod in basic mode, need to reallocate ip
if (destPool.getPodId() != null && !destPool.getPodId().equals(vm.getPodIdToDeployIn())) {
final DataCenterDeployment plan = new DataCenterDeployment(vm.getDataCenterId(), destPool.getPodId(), null, null, null, null);
final VirtualMachineProfileImpl vmProfile = new VirtualMachineProfileImpl(vm, null, null, null, null);
_networkMgr.reallocate(vmProfile, plan);
}
//when start the vm next time, don;'t look at last_host_id, only choose the host based on volume/storage pool
vm.setLastHostId(null);
vm.setPodIdToDeployIn(destPool.getPodId());
// If VM was cold migrated between clusters belonging to two different VMware DCs,
// unregister the VM from the source host and cleanup the associated VM files.
if (vm.getHypervisorType().equals(HypervisorType.VMware)) {
Long srcClusterId = null;
Long srcHostId = vm.getHostId() != null ? vm.getHostId() : vm.getLastHostId();
if (srcHostId != null) {
HostVO srcHost = _hostDao.findById(srcHostId);
srcClusterId = srcHost.getClusterId();
}
final Long destClusterId = destPool.getClusterId();
if (srcClusterId != null && destClusterId != null && ! srcClusterId.equals(destClusterId)) {
final String srcDcName = _clusterDetailsDao.getVmwareDcName(srcClusterId);
final String destDcName = _clusterDetailsDao.getVmwareDcName(destClusterId);
if (srcDcName != null && destDcName != null && !srcDcName.equals(destDcName)) {
s_logger.debug("Since VM's storage was successfully migrated across VMware Datacenters, unregistering VM: " + vm.getInstanceName() +
" from source host: " + srcHostId);
final UnregisterVMCommand uvc = new UnregisterVMCommand(vm.getInstanceName());
uvc.setCleanupVmFiles(true);
try {
_agentMgr.send(srcHostId, uvc);
} catch (final AgentUnavailableException | OperationTimedoutException e) {
throw new CloudRuntimeException("Failed to unregister VM: " + vm.getInstanceName() + " from source host: " + srcHostId +
" after successfully migrating VM's storage across VMware Datacenters");
}
}
}
}
} else {
s_logger.debug("Storage migration failed");
}
} catch (final ConcurrentOperationException e) {
s_logger.debug("Failed to migration: " + e.toString());
throw new CloudRuntimeException("Failed to migration: " + e.toString());
} catch (final InsufficientVirtualNetworkCapacityException e) {
s_logger.debug("Failed to migration: " + e.toString());
throw new CloudRuntimeException("Failed to migration: " + e.toString());
} catch (final InsufficientAddressCapacityException e) {
s_logger.debug("Failed to migration: " + e.toString());
throw new CloudRuntimeException("Failed to migration: " + e.toString());
} catch (final InsufficientCapacityException e) {
s_logger.debug("Failed to migration: " + e.toString());
throw new CloudRuntimeException("Failed to migration: " + e.toString());
} catch (final StorageUnavailableException e) {
s_logger.debug("Failed to migration: " + e.toString());
throw new CloudRuntimeException("Failed to migration: " + e.toString());
} finally {
try {
stateTransitTo(vm, VirtualMachine.Event.AgentReportStopped, null);
} catch (final NoTransitionException e) {
s_logger.debug("Failed to change vm state: " + e.toString());
throw new CloudRuntimeException("Failed to change vm state: " + e.toString());
}
}
}
@Override
public void migrate(final String vmUuid, final long srcHostId, final DeployDestination dest)
throws ResourceUnavailableException, ConcurrentOperationException {
final AsyncJobExecutionContext jobContext = AsyncJobExecutionContext.getCurrentExecutionContext();
if (jobContext.isJobDispatchedBy(VmWorkConstants.VM_WORK_JOB_DISPATCHER)) {
// avoid re-entrance
VmWorkJobVO placeHolder = null;
final VirtualMachine vm = _vmDao.findByUuid(vmUuid);
placeHolder = createPlaceHolderWork(vm.getId());
try {
orchestrateMigrate(vmUuid, srcHostId, dest);
} finally {
if (placeHolder != null) {
_workJobDao.expunge(placeHolder.getId());
}
}
} else {
final Outcome<VirtualMachine> outcome = migrateVmThroughJobQueue(vmUuid, srcHostId, dest);
try {
final VirtualMachine vm = outcome.get();
} catch (final InterruptedException e) {
throw new RuntimeException("Operation is interrupted", e);
} catch (final java.util.concurrent.ExecutionException e) {
throw new RuntimeException("Execution excetion", e);
}
final Object jobResult = _jobMgr.unmarshallResultObject(outcome.getJob());
if (jobResult != null) {
if (jobResult instanceof ResourceUnavailableException) {
throw (ResourceUnavailableException)jobResult;
} else if (jobResult instanceof ConcurrentOperationException) {
throw (ConcurrentOperationException)jobResult;
} else if (jobResult instanceof RuntimeException) {
throw (RuntimeException)jobResult;
} else if (jobResult instanceof Throwable) {
throw new RuntimeException("Unexpected exception", (Throwable)jobResult);
}
}
}
}
private void orchestrateMigrate(final String vmUuid, final long srcHostId, final DeployDestination dest) throws ResourceUnavailableException, ConcurrentOperationException {
final VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
if (vm == null) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Unable to find the vm " + vmUuid);
}
throw new CloudRuntimeException("Unable to find a virtual machine with id " + vmUuid);
}
migrate(vm, srcHostId, dest);
}
protected void migrate(final VMInstanceVO vm, final long srcHostId, final DeployDestination dest) throws ResourceUnavailableException, ConcurrentOperationException {
s_logger.info("Migrating " + vm + " to " + dest);
final long dstHostId = dest.getHost().getId();
final Host fromHost = _hostDao.findById(srcHostId);
if (fromHost == null) {
s_logger.info("Unable to find the host to migrate from: " + srcHostId);
throw new CloudRuntimeException("Unable to find the host to migrate from: " + srcHostId);
}
if (fromHost.getClusterId().longValue() != dest.getCluster().getId()) {
final List<VolumeVO> volumes = _volsDao.findCreatedByInstance(vm.getId());
for (final VolumeVO volume : volumes) {
if (!_storagePoolDao.findById(volume.getPoolId()).getScope().equals(ScopeType.ZONE)) {
s_logger.info("Source and destination host are not in same cluster and all volumes are not on zone wide primary store, unable to migrate to host: "
+ dest.getHost().getId());
throw new CloudRuntimeException(
"Source and destination host are not in same cluster and all volumes are not on zone wide primary store, unable to migrate to host: "
+ dest.getHost().getId());
}
}
}
final VirtualMachineGuru vmGuru = getVmGuru(vm);
if (vm.getState() != State.Running) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("VM is not Running, unable to migrate the vm " + vm);
}
throw new CloudRuntimeException("VM is not Running, unable to migrate the vm currently " + vm + " , current state: " + vm.getState().toString());
}
AlertManager.AlertType alertType = AlertManager.AlertType.ALERT_TYPE_USERVM_MIGRATE;
if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
alertType = AlertManager.AlertType.ALERT_TYPE_DOMAIN_ROUTER_MIGRATE;
} else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
alertType = AlertManager.AlertType.ALERT_TYPE_CONSOLE_PROXY_MIGRATE;
}
final VirtualMachineProfile vmSrc = new VirtualMachineProfileImpl(vm);
for (final NicProfile nic : _networkMgr.getNicProfiles(vm)) {
vmSrc.addNic(nic);
}
final VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm, null, _offeringDao.findById(vm.getId(), vm.getServiceOfferingId()), null, null);
_networkMgr.prepareNicForMigration(profile, dest);
volumeMgr.prepareForMigration(profile, dest);
profile.setConfigDriveLabel(VmConfigDriveLabel.value());
final VirtualMachineTO to = toVmTO(profile);
final PrepareForMigrationCommand pfmc = new PrepareForMigrationCommand(to);
ItWorkVO work = new ItWorkVO(UUID.randomUUID().toString(), _nodeId, State.Migrating, vm.getType(), vm.getId());
work.setStep(Step.Prepare);
work.setResourceType(ItWorkVO.ResourceType.Host);
work.setResourceId(dstHostId);
work = _workDao.persist(work);
Answer pfma = null;
try {
pfma = _agentMgr.send(dstHostId, pfmc);
if (pfma == null || !pfma.getResult()) {
final String details = pfma != null ? pfma.getDetails() : "null answer returned";
final String msg = "Unable to prepare for migration due to " + details;
pfma = null;
throw new AgentUnavailableException(msg, dstHostId);
}
} catch (final OperationTimedoutException e1) {
throw new AgentUnavailableException("Operation timed out", dstHostId);
} finally {
if (pfma == null) {
_networkMgr.rollbackNicForMigration(vmSrc, profile);
work.setStep(Step.Done);
_workDao.update(work.getId(), work);
}
}
vm.setLastHostId(srcHostId);
try {
if (vm == null || vm.getHostId() == null || vm.getHostId() != srcHostId || !changeState(vm, Event.MigrationRequested, dstHostId, work, Step.Migrating)) {
_networkMgr.rollbackNicForMigration(vmSrc, profile);
s_logger.info("Migration cancelled because state has changed: " + vm);
throw new ConcurrentOperationException("Migration cancelled because state has changed: " + vm);
}
} catch (final NoTransitionException e1) {
_networkMgr.rollbackNicForMigration(vmSrc, profile);
s_logger.info("Migration cancelled because " + e1.getMessage());
throw new ConcurrentOperationException("Migration cancelled because " + e1.getMessage());
}
boolean migrated = false;
try {
final boolean isWindows = _guestOsCategoryDao.findById(_guestOsDao.findById(vm.getGuestOSId()).getCategoryId()).getName().equalsIgnoreCase("Windows");
final MigrateCommand mc = new MigrateCommand(vm.getInstanceName(), dest.getHost().getPrivateIpAddress(), isWindows, to, getExecuteInSequence(vm.getHypervisorType()));
String autoConvergence = _configDao.getValue(Config.KvmAutoConvergence.toString());
boolean kvmAutoConvergence = Boolean.parseBoolean(autoConvergence);
mc.setAutoConvergence(kvmAutoConvergence);
mc.setHostGuid(dest.getHost().getGuid());
try {
final Answer ma = _agentMgr.send(vm.getLastHostId(), mc);
if (ma == null || !ma.getResult()) {
final String details = ma != null ? ma.getDetails() : "null answer returned";
throw new CloudRuntimeException(details);
}
} catch (final OperationTimedoutException e) {
if (e.isActive()) {
s_logger.warn("Active migration command so scheduling a restart for " + vm);
_haMgr.scheduleRestart(vm, true);
}
throw new AgentUnavailableException("Operation timed out on migrating " + vm, dstHostId);
}
try {
if (!changeState(vm, VirtualMachine.Event.OperationSucceeded, dstHostId, work, Step.Started)) {
throw new ConcurrentOperationException("Unable to change the state for " + vm);
}
} catch (final NoTransitionException e1) {
throw new ConcurrentOperationException("Unable to change state due to " + e1.getMessage());
}
try {
if (!checkVmOnHost(vm, dstHostId)) {
s_logger.error("Unable to complete migration for " + vm);
try {
_agentMgr.send(srcHostId, new Commands(cleanup(vm)), null);
} catch (final AgentUnavailableException e) {
s_logger.error("AgentUnavailableException while cleanup on source host: " + srcHostId);
}
cleanup(vmGuru, new VirtualMachineProfileImpl(vm), work, Event.AgentReportStopped, true);
throw new CloudRuntimeException("Unable to complete migration for " + vm);
}
} catch (final OperationTimedoutException e) {
s_logger.debug("Error while checking the vm " + vm + " on host " + dstHostId, e);
}
migrated = true;
} finally {
if (!migrated) {
s_logger.info("Migration was unsuccessful. Cleaning up: " + vm);
_networkMgr.rollbackNicForMigration(vmSrc, profile);
_alertMgr.sendAlert(alertType, fromHost.getDataCenterId(), fromHost.getPodId(),
"Unable to migrate vm " + vm.getInstanceName() + " from host " + fromHost.getName() + " in zone " + dest.getDataCenter().getName() + " and pod " +
dest.getPod().getName(), "Migrate Command failed. Please check logs.");
try {
_agentMgr.send(dstHostId, new Commands(cleanup(vm)), null);
} catch (final AgentUnavailableException ae) {
s_logger.info("Looks like the destination Host is unavailable for cleanup");
}
try {
stateTransitTo(vm, Event.OperationFailed, srcHostId);
} catch (final NoTransitionException e) {
s_logger.warn(e.getMessage());
}
} else {
_networkMgr.commitNicForMigration(vmSrc, profile);
}
work.setStep(Step.Done);
_workDao.update(work.getId(), work);
}
}
private Map<Volume, StoragePool> getPoolListForVolumesForMigration(final VirtualMachineProfile profile, final Host host, final Map<Long, Long> volumeToPool) {
final List<VolumeVO> allVolumes = _volsDao.findUsableVolumesForInstance(profile.getId());
final Map<Volume, StoragePool> volumeToPoolObjectMap = new HashMap<>();
for (final VolumeVO volume : allVolumes) {
final Long poolId = volumeToPool.get(volume.getId());
final StoragePoolVO destPool = _storagePoolDao.findById(poolId);
final StoragePoolVO currentPool = _storagePoolDao.findById(volume.getPoolId());
final DiskOfferingVO diskOffering = _diskOfferingDao.findById(volume.getDiskOfferingId());
if (destPool != null) {
if (currentPool.isManaged()) {
if (destPool.getId() == currentPool.getId()) {
volumeToPoolObjectMap.put(volume, currentPool);
}
else {
throw new CloudRuntimeException("Currently, a volume on managed storage can only be 'migrated' to itself.");
}
}
else {
// Check if pool is accessible from the destination host and disk offering with which the volume was
// created is compliant with the pool type.
if (_poolHostDao.findByPoolHost(destPool.getId(), host.getId()) == null || destPool.isLocal() != diskOffering.getUseLocalStorage()) {
// Cannot find a pool for the volume. Throw an exception.
throw new CloudRuntimeException("Cannot migrate volume " + volume + " to storage pool " + destPool + " while migrating vm to host " + host +
". Either the pool is not accessible from the host or because of the offering with which the volume is created it cannot be placed on " +
"the given pool.");
} else if (destPool.getId() == currentPool.getId()) {
// If the pool to migrate to is the same as current pool, the volume doesn't need to be migrated.
} else {
volumeToPoolObjectMap.put(volume, destPool);
}
}
} else {
if (currentPool.isManaged()) {
if (currentPool.getScope() == ScopeType.ZONE) {
volumeToPoolObjectMap.put(volume, currentPool);
}
else {
throw new CloudRuntimeException("Currently, you can only 'migrate' a volume on managed storage if its storage pool is zone wide.");
}
} else {
// Find a suitable pool for the volume. Call the storage pool allocator to find the list of pools.
final DiskProfile diskProfile = new DiskProfile(volume, diskOffering, profile.getHypervisorType());
final DataCenterDeployment plan = new DataCenterDeployment(host.getDataCenterId(), host.getPodId(), host.getClusterId(),
host.getId(), null, null);
final List<StoragePool> poolList = new ArrayList<>();
final ExcludeList avoid = new ExcludeList();
for (final StoragePoolAllocator allocator : _storagePoolAllocators) {
final List<StoragePool> poolListFromAllocator = allocator.allocateToPool(diskProfile, profile, plan, avoid, StoragePoolAllocator.RETURN_UPTO_ALL);
if (poolListFromAllocator != null && !poolListFromAllocator.isEmpty()) {
poolList.addAll(poolListFromAllocator);
}
}
boolean currentPoolAvailable = false;
if (poolList != null && !poolList.isEmpty()) {
// Volume needs to be migrated. Pick the first pool from the list. Add a mapping to migrate the
// volume to a pool only if it is required; that is the current pool on which the volume resides
// is not available on the destination host.
final Iterator<StoragePool> iter = poolList.iterator();
while (iter.hasNext()) {
if (currentPool.getId() == iter.next().getId()) {
currentPoolAvailable = true;
break;
}
}
if (!currentPoolAvailable) {
volumeToPoolObjectMap.put(volume, _storagePoolDao.findByUuid(poolList.get(0).getUuid()));
}
}
if (!currentPoolAvailable && !volumeToPoolObjectMap.containsKey(volume)) {
// Cannot find a pool for the volume. Throw an exception.
throw new CloudRuntimeException("Cannot find a storage pool which is available for volume " + volume + " while migrating virtual machine " +
profile.getVirtualMachine() + " to host " + host);
}
}
}
}
return volumeToPoolObjectMap;
}
private <T extends VMInstanceVO> void moveVmToMigratingState(final T vm, final Long hostId, final ItWorkVO work) throws ConcurrentOperationException {
// Put the vm in migrating state.
try {
if (!changeState(vm, Event.MigrationRequested, hostId, work, Step.Migrating)) {
s_logger.info("Migration cancelled because state has changed: " + vm);
throw new ConcurrentOperationException("Migration cancelled because state has changed: " + vm);
}
} catch (final NoTransitionException e) {
s_logger.info("Migration cancelled because " + e.getMessage());
throw new ConcurrentOperationException("Migration cancelled because " + e.getMessage());
}
}
private <T extends VMInstanceVO> void moveVmOutofMigratingStateOnSuccess(final T vm, final Long hostId, final ItWorkVO work) throws ConcurrentOperationException {
// Put the vm in running state.
try {
if (!changeState(vm, Event.OperationSucceeded, hostId, work, Step.Started)) {
s_logger.error("Unable to change the state for " + vm);
throw new ConcurrentOperationException("Unable to change the state for " + vm);
}
} catch (final NoTransitionException e) {
s_logger.error("Unable to change state due to " + e.getMessage());
throw new ConcurrentOperationException("Unable to change state due to " + e.getMessage());
}
}
@Override
public void migrateWithStorage(final String vmUuid, final long srcHostId, final long destHostId, final Map<Long, Long> volumeToPool)
throws ResourceUnavailableException, ConcurrentOperationException {
final AsyncJobExecutionContext jobContext = AsyncJobExecutionContext.getCurrentExecutionContext();
if (jobContext.isJobDispatchedBy(VmWorkConstants.VM_WORK_JOB_DISPATCHER)) {
// avoid re-entrance
VmWorkJobVO placeHolder = null;
final VirtualMachine vm = _vmDao.findByUuid(vmUuid);
placeHolder = createPlaceHolderWork(vm.getId());
try {
orchestrateMigrateWithStorage(vmUuid, srcHostId, destHostId, volumeToPool);
} finally {
if (placeHolder != null) {
_workJobDao.expunge(placeHolder.getId());
}
}
} else {
final Outcome<VirtualMachine> outcome = migrateVmWithStorageThroughJobQueue(vmUuid, srcHostId, destHostId, volumeToPool);
try {
final VirtualMachine vm = outcome.get();
} catch (final InterruptedException e) {
throw new RuntimeException("Operation is interrupted", e);
} catch (final java.util.concurrent.ExecutionException e) {
throw new RuntimeException("Execution excetion", e);
}
final Object jobException = _jobMgr.unmarshallResultObject(outcome.getJob());
if (jobException != null) {
if (jobException instanceof ResourceUnavailableException) {
throw (ResourceUnavailableException)jobException;
} else if (jobException instanceof ConcurrentOperationException) {
throw (ConcurrentOperationException)jobException;
} else if (jobException instanceof RuntimeException) {
throw (RuntimeException)jobException;
} else if (jobException instanceof Throwable) {
throw new RuntimeException("Unexpected exception", (Throwable)jobException);
}
}
}
}
private void orchestrateMigrateWithStorage(final String vmUuid, final long srcHostId, final long destHostId, final Map<Long, Long> volumeToPool) throws ResourceUnavailableException,
ConcurrentOperationException {
final VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
final HostVO srcHost = _hostDao.findById(srcHostId);
final HostVO destHost = _hostDao.findById(destHostId);
final VirtualMachineGuru vmGuru = getVmGuru(vm);
final DataCenterVO dc = _dcDao.findById(destHost.getDataCenterId());
final HostPodVO pod = _podDao.findById(destHost.getPodId());
final Cluster cluster = _clusterDao.findById(destHost.getClusterId());
final DeployDestination destination = new DeployDestination(dc, pod, cluster, destHost);
// Create a map of which volume should go in which storage pool.
final VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm);
final Map<Volume, StoragePool> volumeToPoolMap = getPoolListForVolumesForMigration(profile, destHost, volumeToPool);
// If none of the volumes have to be migrated, fail the call. Administrator needs to make a call for migrating
// a vm and not migrating a vm with storage.
if (volumeToPoolMap == null || volumeToPoolMap.isEmpty()) {
throw new InvalidParameterValueException("Migration of the vm " + vm + "from host " + srcHost + " to destination host " + destHost +
" doesn't involve migrating the volumes.");
}
AlertManager.AlertType alertType = AlertManager.AlertType.ALERT_TYPE_USERVM_MIGRATE;
if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
alertType = AlertManager.AlertType.ALERT_TYPE_DOMAIN_ROUTER_MIGRATE;
} else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
alertType = AlertManager.AlertType.ALERT_TYPE_CONSOLE_PROXY_MIGRATE;
}
_networkMgr.prepareNicForMigration(profile, destination);
volumeMgr.prepareForMigration(profile, destination);
final HypervisorGuru hvGuru = _hvGuruMgr.getGuru(vm.getHypervisorType());
final VirtualMachineTO to = hvGuru.implement(profile);
ItWorkVO work = new ItWorkVO(UUID.randomUUID().toString(), _nodeId, State.Migrating, vm.getType(), vm.getId());
work.setStep(Step.Prepare);
work.setResourceType(ItWorkVO.ResourceType.Host);
work.setResourceId(destHostId);
work = _workDao.persist(work);
// Put the vm in migrating state.
vm.setLastHostId(srcHostId);
vm.setPodIdToDeployIn(destHost.getPodId());
moveVmToMigratingState(vm, destHostId, work);
boolean migrated = false;
try {
// config drive: Detach the config drive at source host
// After migration successful attach the config drive in destination host
// On migration failure VM will be stopped, So configIso will be deleted
Nic defaultNic = _networkModel.getDefaultNic(vm.getId());
List<String[]> vmData = null;
if (defaultNic != null) {
UserVmVO userVm = _userVmDao.findById(vm.getId());
Map<String, String> details = _vmDetailsDao.listDetailsKeyPairs(vm.getId());
userVm.setDetails(details);
Network network = _networkModel.getNetwork(defaultNic.getNetworkId());
if (_networkModel.isSharedNetworkWithoutServices(network.getId())) {
final String serviceOffering = _serviceOfferingDao.findByIdIncludingRemoved(vm.getId(), vm.getServiceOfferingId()).getDisplayText();
boolean isWindows = _guestOSCategoryDao.findById(_guestOSDao.findById(vm.getGuestOSId()).getCategoryId()).getName().equalsIgnoreCase("Windows");
vmData = _networkModel.generateVmData(userVm.getUserData(), serviceOffering, vm.getDataCenterId(), vm.getInstanceName(), vm.getHostName(), vm.getId(),
vm.getUuid(), defaultNic.getMacAddress(), userVm.getDetail("SSH.PublicKey"), (String) profile.getParameter(VirtualMachineProfile.Param.VmPassword), isWindows);
String vmName = vm.getInstanceName();
String configDriveIsoRootFolder = "/tmp";
String isoFile = configDriveIsoRootFolder + "/" + vmName + "/configDrive/" + vmName + ".iso";
profile.setVmData(vmData);
profile.setConfigDriveLabel(VmConfigDriveLabel.value());
profile.setConfigDriveIsoRootFolder(configDriveIsoRootFolder);
profile.setConfigDriveIsoFile(isoFile);
// At source host detach the config drive iso.
AttachOrDettachConfigDriveCommand dettachCommand = new AttachOrDettachConfigDriveCommand(vm.getInstanceName(), vmData, VmConfigDriveLabel.value(), false);
try {
_agentMgr.send(srcHost.getId(), dettachCommand);
s_logger.debug("Deleted config drive ISO for vm " + vm.getInstanceName() + " In host " + srcHost);
} catch (OperationTimedoutException e) {
s_logger.debug("TIme out occured while exeuting command AttachOrDettachConfigDrive " + e.getMessage());
}
}
}
// Migrate the vm and its volume.
volumeMgr.migrateVolumes(vm, to, srcHost, destHost, volumeToPoolMap);
// Put the vm back to running state.
moveVmOutofMigratingStateOnSuccess(vm, destHost.getId(), work);
try {
if (!checkVmOnHost(vm, destHostId)) {
s_logger.error("Vm not found on destination host. Unable to complete migration for " + vm);
try {
_agentMgr.send(srcHostId, new Commands(cleanup(vm.getInstanceName())), null);
} catch (final AgentUnavailableException e) {
s_logger.error("AgentUnavailableException while cleanup on source host: " + srcHostId);
}
cleanup(vmGuru, new VirtualMachineProfileImpl(vm), work, Event.AgentReportStopped, true);
throw new CloudRuntimeException("VM not found on desintation host. Unable to complete migration for " + vm);
}
} catch (final OperationTimedoutException e) {
s_logger.warn("Error while checking the vm " + vm + " is on host " + destHost, e);
}
migrated = true;
} finally {
if (!migrated) {
s_logger.info("Migration was unsuccessful. Cleaning up: " + vm);
_alertMgr.sendAlert(alertType, srcHost.getDataCenterId(), srcHost.getPodId(),
"Unable to migrate vm " + vm.getInstanceName() + " from host " + srcHost.getName() + " in zone " + dc.getName() + " and pod " + dc.getName(),
"Migrate Command failed. Please check logs.");
try {
_agentMgr.send(destHostId, new Commands(cleanup(vm.getInstanceName())), null);
vm.setPodIdToDeployIn(srcHost.getPodId());
stateTransitTo(vm, Event.OperationFailed, srcHostId);
} catch (final AgentUnavailableException e) {
s_logger.warn("Looks like the destination Host is unavailable for cleanup.", e);
} catch (final NoTransitionException e) {
s_logger.error("Error while transitioning vm from migrating to running state.", e);
}
}
work.setStep(Step.Done);
_workDao.update(work.getId(), work);
}
}
@Override
public VirtualMachineTO toVmTO(final VirtualMachineProfile profile) {
final HypervisorGuru hvGuru = _hvGuruMgr.getGuru(profile.getVirtualMachine().getHypervisorType());
final VirtualMachineTO to = hvGuru.implement(profile);
return to;
}
protected void cancelWorkItems(final long nodeId) {
final GlobalLock scanLock = GlobalLock.getInternLock("vmmgr.cancel.workitem");
try {
if (scanLock.lock(3)) {
try {
final List<ItWorkVO> works = _workDao.listWorkInProgressFor(nodeId);
for (final ItWorkVO work : works) {
s_logger.info("Handling unfinished work item: " + work);
try {
final VMInstanceVO vm = _vmDao.findById(work.getInstanceId());
if (vm != null) {
if (work.getType() == State.Starting) {
_haMgr.scheduleRestart(vm, true);
work.setManagementServerId(_nodeId);
work.setStep(Step.Done);
_workDao.update(work.getId(), work);
} else if (work.getType() == State.Stopping) {
_haMgr.scheduleStop(vm, vm.getHostId(), WorkType.CheckStop);
work.setManagementServerId(_nodeId);
work.setStep(Step.Done);
_workDao.update(work.getId(), work);
} else if (work.getType() == State.Migrating) {
_haMgr.scheduleMigration(vm);
work.setStep(Step.Done);
_workDao.update(work.getId(), work);
}
}
} catch (final Exception e) {
s_logger.error("Error while handling " + work, e);
}
}
} finally {
scanLock.unlock();
}
}
} finally {
scanLock.releaseRef();
}
}
@Override
public void migrateAway(final String vmUuid, final long srcHostId) throws InsufficientServerCapacityException {
final AsyncJobExecutionContext jobContext = AsyncJobExecutionContext.getCurrentExecutionContext();
if (jobContext.isJobDispatchedBy(VmWorkConstants.VM_WORK_JOB_DISPATCHER)) {
// avoid re-entrance
VmWorkJobVO placeHolder = null;
final VirtualMachine vm = _vmDao.findByUuid(vmUuid);
placeHolder = createPlaceHolderWork(vm.getId());
try {
try {
orchestrateMigrateAway(vmUuid, srcHostId, null);
} catch (final InsufficientServerCapacityException e) {
s_logger.warn("Failed to deploy vm " + vmUuid + " with original planner, sending HAPlanner");
orchestrateMigrateAway(vmUuid, srcHostId, _haMgr.getHAPlanner());
}
} finally {
_workJobDao.expunge(placeHolder.getId());
}
} else {
final Outcome<VirtualMachine> outcome = migrateVmAwayThroughJobQueue(vmUuid, srcHostId);
try {
final VirtualMachine vm = outcome.get();
} catch (final InterruptedException e) {
throw new RuntimeException("Operation is interrupted", e);
} catch (final java.util.concurrent.ExecutionException e) {
throw new RuntimeException("Execution excetion", e);
}
final Object jobException = _jobMgr.unmarshallResultObject(outcome.getJob());
if (jobException != null) {
if (jobException instanceof InsufficientServerCapacityException) {
throw (InsufficientServerCapacityException)jobException;
} else if (jobException instanceof ConcurrentOperationException) {
throw (ConcurrentOperationException)jobException;
} else if (jobException instanceof RuntimeException) {
throw (RuntimeException)jobException;
} else if (jobException instanceof Throwable) {
throw new RuntimeException("Unexpected exception", (Throwable)jobException);
}
}
}
}
private void orchestrateMigrateAway(final String vmUuid, final long srcHostId, final DeploymentPlanner planner) throws InsufficientServerCapacityException {
final VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
if (vm == null) {
s_logger.debug("Unable to find a VM for " + vmUuid);
throw new CloudRuntimeException("Unable to find " + vmUuid);
}
ServiceOfferingVO offeringVO = _offeringDao.findById(vm.getId(), vm.getServiceOfferingId());
final VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm, null, offeringVO, null, null);
final Long hostId = vm.getHostId();
if (hostId == null) {
s_logger.debug("Unable to migrate because the VM doesn't have a host id: " + vm);
throw new CloudRuntimeException("Unable to migrate " + vmUuid);
}
final Host host = _hostDao.findById(hostId);
Long poolId = null;
final List<VolumeVO> vols = _volsDao.findReadyRootVolumesByInstance(vm.getId());
for (final VolumeVO rootVolumeOfVm : vols) {
final StoragePoolVO rootDiskPool = _storagePoolDao.findById(rootVolumeOfVm.getPoolId());
if (rootDiskPool != null) {
poolId = rootDiskPool.getId();
}
}
final DataCenterDeployment plan = new DataCenterDeployment(host.getDataCenterId(), host.getPodId(), host.getClusterId(), null, poolId, null);
final ExcludeList excludes = new ExcludeList();
excludes.addHost(hostId);
DeployDestination dest = null;
while (true) {
try {
dest = _dpMgr.planDeployment(profile, plan, excludes, planner);
} catch (final AffinityConflictException e2) {
s_logger.warn("Unable to create deployment, affinity rules associted to the VM conflict", e2);
throw new CloudRuntimeException("Unable to create deployment, affinity rules associted to the VM conflict");
}
if (dest != null) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Found destination " + dest + " for migrating to.");
}
} else {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Unable to find destination for migrating the vm " + profile);
}
throw new InsufficientServerCapacityException("Unable to find a server to migrate to.", host.getClusterId());
}
excludes.addHost(dest.getHost().getId());
try {
migrate(vm, srcHostId, dest);
return;
} catch (final ResourceUnavailableException e) {
s_logger.debug("Unable to migrate to unavailable " + dest);
} catch (final ConcurrentOperationException e) {
s_logger.debug("Unable to migrate VM due to: " + e.getMessage());
}
try {
advanceStop(vmUuid, true);
throw new CloudRuntimeException("Unable to migrate " + vm);
} catch (final ResourceUnavailableException e) {
s_logger.debug("Unable to stop VM due to " + e.getMessage());
throw new CloudRuntimeException("Unable to migrate " + vm);
} catch (final ConcurrentOperationException e) {
s_logger.debug("Unable to stop VM due to " + e.getMessage());
throw new CloudRuntimeException("Unable to migrate " + vm);
} catch (final OperationTimedoutException e) {
s_logger.debug("Unable to stop VM due to " + e.getMessage());
throw new CloudRuntimeException("Unable to migrate " + vm);
}
}
}
protected class CleanupTask extends ManagedContextRunnable {
@Override
protected void runInContext() {
s_logger.trace("VM Operation Thread Running");
try {
_workDao.cleanup(VmOpCleanupWait.value());
final Date cutDate = new Date(DateUtil.currentGMTTime().getTime() - VmOpCleanupInterval.value() * 1000);
_workJobDao.expungeCompletedWorkJobs(cutDate);
} catch (final Exception e) {
s_logger.error("VM Operations failed due to ", e);
}
}
}
@Override
public boolean isVirtualMachineUpgradable(final VirtualMachine vm, final ServiceOffering offering) {
boolean isMachineUpgradable = true;
for (final HostAllocator allocator : hostAllocators) {
isMachineUpgradable = allocator.isVirtualMachineUpgradable(vm, offering);
if (isMachineUpgradable) {
continue;
} else {
break;
}
}
return isMachineUpgradable;
}
@Override
public void reboot(final String vmUuid, final Map<VirtualMachineProfile.Param, Object> params) throws InsufficientCapacityException, ResourceUnavailableException {
try {
advanceReboot(vmUuid, params);
} catch (final ConcurrentOperationException e) {
throw new CloudRuntimeException("Unable to reboot a VM due to concurrent operation", e);
}
}
@Override
public void advanceReboot(final String vmUuid, final Map<VirtualMachineProfile.Param, Object> params)
throws InsufficientCapacityException, ConcurrentOperationException, ResourceUnavailableException {
final AsyncJobExecutionContext jobContext = AsyncJobExecutionContext.getCurrentExecutionContext();
if ( jobContext.isJobDispatchedBy(VmWorkConstants.VM_WORK_JOB_DISPATCHER)) {
// avoid re-entrance
VmWorkJobVO placeHolder = null;
final VirtualMachine vm = _vmDao.findByUuid(vmUuid);
placeHolder = createPlaceHolderWork(vm.getId());
try {
orchestrateReboot(vmUuid, params);
} finally {
if (placeHolder != null) {
_workJobDao.expunge(placeHolder.getId());
}
}
} else {
final Outcome<VirtualMachine> outcome = rebootVmThroughJobQueue(vmUuid, params);
try {
final VirtualMachine vm = outcome.get();
} catch (final InterruptedException e) {
throw new RuntimeException("Operation is interrupted", e);
} catch (final java.util.concurrent.ExecutionException e) {
throw new RuntimeException("Execution excetion", e);
}
final Object jobResult = _jobMgr.unmarshallResultObject(outcome.getJob());
if (jobResult != null) {
if (jobResult instanceof ResourceUnavailableException) {
throw (ResourceUnavailableException)jobResult;
} else if (jobResult instanceof ConcurrentOperationException) {
throw (ConcurrentOperationException)jobResult;
} else if (jobResult instanceof InsufficientCapacityException) {
throw (InsufficientCapacityException)jobResult;
} else if (jobResult instanceof RuntimeException) {
throw (RuntimeException)jobResult;
} else if (jobResult instanceof Throwable) {
throw new RuntimeException("Unexpected exception", (Throwable)jobResult);
}
}
}
}
private void orchestrateReboot(final String vmUuid, final Map<VirtualMachineProfile.Param, Object> params) throws InsufficientCapacityException, ConcurrentOperationException,
ResourceUnavailableException {
final VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
// if there are active vm snapshots task, state change is not allowed
if(_vmSnapshotMgr.hasActiveVMSnapshotTasks(vm.getId())){
s_logger.error("Unable to reboot VM " + vm + " due to: " + vm.getInstanceName() + " has active VM snapshots tasks");
throw new CloudRuntimeException("Unable to reboot VM " + vm + " due to: " + vm.getInstanceName() + " has active VM snapshots tasks");
}
final DataCenter dc = _entityMgr.findById(DataCenter.class, vm.getDataCenterId());
final Host host = _hostDao.findById(vm.getHostId());
if (host == null) {
// Should findById throw an Exception is the host is not found?
throw new CloudRuntimeException("Unable to retrieve host with id " + vm.getHostId());
}
final Cluster cluster = _entityMgr.findById(Cluster.class, host.getClusterId());
final Pod pod = _entityMgr.findById(Pod.class, host.getPodId());
final DeployDestination dest = new DeployDestination(dc, pod, cluster, host);
try {
final Commands cmds = new Commands(Command.OnError.Stop);
cmds.addCommand(new RebootCommand(vm.getInstanceName(), getExecuteInSequence(vm.getHypervisorType())));
_agentMgr.send(host.getId(), cmds);
final Answer rebootAnswer = cmds.getAnswer(RebootAnswer.class);
if (rebootAnswer != null && rebootAnswer.getResult()) {
return;
}
s_logger.info("Unable to reboot VM " + vm + " on " + dest.getHost() + " due to " + (rebootAnswer == null ? " no reboot answer" : rebootAnswer.getDetails()));
} catch (final OperationTimedoutException e) {
s_logger.warn("Unable to send the reboot command to host " + dest.getHost() + " for the vm " + vm + " due to operation timeout", e);
throw new CloudRuntimeException("Failed to reboot the vm on host " + dest.getHost());
}
}
public Command cleanup(final VirtualMachine vm) {
StopCommand cmd = new StopCommand(vm, getExecuteInSequence(vm.getHypervisorType()), false);
cmd.setControlIp(getControlNicIpForVM(vm));
return cmd;
}
private String getControlNicIpForVM(VirtualMachine vm) {
if (vm.getType() == VirtualMachine.Type.ConsoleProxy || vm.getType() == VirtualMachine.Type.SecondaryStorageVm) {
NicVO nic = _nicsDao.getControlNicForVM(vm.getId());
return nic.getIPv4Address();
} else if (vm.getType() == VirtualMachine.Type.DomainRouter) return vm.getPrivateIpAddress();
else return null;
}
public Command cleanup(final String vmName) {
VirtualMachine vm = _vmDao.findVMByInstanceName(vmName);
StopCommand cmd = new StopCommand(vmName, getExecuteInSequence(null), false);
cmd.setControlIp(getControlNicIpForVM(vm));
return cmd;
}
// this is XenServer specific
public void syncVMMetaData(final Map<String, String> vmMetadatum) {
if (vmMetadatum == null || vmMetadatum.isEmpty()) {
return;
}
List<Pair<Pair<String, VirtualMachine.Type>, Pair<Long, String>>> vmDetails = _userVmDao.getVmsDetailByNames(vmMetadatum.keySet(), "platform");
for (final Map.Entry<String, String> entry : vmMetadatum.entrySet()) {
final String name = entry.getKey();
final String platform = entry.getValue();
if (platform == null || platform.isEmpty()) {
continue;
}
boolean found = false;
for(Pair<Pair<String, VirtualMachine.Type>, Pair<Long, String>> vmDetail : vmDetails ) {
Pair<String, VirtualMachine.Type> vmNameTypePair = vmDetail.first();
if(vmNameTypePair.first().equals(name)) {
found = true;
if(vmNameTypePair.second() == VirtualMachine.Type.User) {
Pair<Long, String> detailPair = vmDetail.second();
String platformDetail = detailPair.second();
if (platformDetail != null && platformDetail.equals(platform)) {
break;
}
updateVmMetaData(detailPair.first(), platform);
}
break;
}
}
if(!found) {
VMInstanceVO vm = _vmDao.findVMByInstanceName(name);
if(vm != null && vm.getType() == VirtualMachine.Type.User) {
updateVmMetaData(vm.getId(), platform);
}
}
}
}
// this is XenServer specific
private void updateVmMetaData(Long vmId, String platform) {
UserVmVO userVm = _userVmDao.findById(vmId);
_userVmDao.loadDetails(userVm);
if ( userVm.details.containsKey("timeoffset")) {
userVm.details.remove("timeoffset");
}
userVm.setDetail("platform", platform);
String pvdriver = "xenserver56";
if ( platform.contains("device_id")) {
pvdriver = "xenserver61";
}
if (!userVm.details.containsKey("hypervisortoolsversion") || !userVm.details.get("hypervisortoolsversion").equals(pvdriver)) {
userVm.setDetail("hypervisortoolsversion", pvdriver);
}
_userVmDao.saveDetails(userVm);
}
private void ensureVmRunningContext(final long hostId, VMInstanceVO vm, final Event cause) throws OperationTimedoutException, ResourceUnavailableException,
NoTransitionException, InsufficientAddressCapacityException {
final VirtualMachineGuru vmGuru = getVmGuru(vm);
s_logger.debug("VM state is starting on full sync so updating it to running");
vm = _vmDao.findById(vm.getId());
// grab outstanding work item if any
final ItWorkVO work = _workDao.findByOutstandingWork(vm.getId(), vm.getState());
if (work != null) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Found an outstanding work item for this vm " + vm + " in state:" + vm.getState() + ", work id:" + work.getId());
}
}
try {
stateTransitTo(vm, cause, hostId);
} catch (final NoTransitionException e1) {
s_logger.warn(e1.getMessage());
}
s_logger.debug("VM's " + vm + " state is starting on full sync so updating it to Running");
vm = _vmDao.findById(vm.getId()); // this should ensure vm has the most
// up to date info
final VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm);
final List<NicVO> nics = _nicsDao.listByVmId(profile.getId());
for (final NicVO nic : nics) {
final Network network = _networkModel.getNetwork(nic.getNetworkId());
final NicProfile nicProfile =
new NicProfile(nic, network, nic.getBroadcastUri(), nic.getIsolationUri(), null, _networkModel.isSecurityGroupSupportedInNetwork(network),
_networkModel.getNetworkTag(profile.getHypervisorType(), network));
profile.addNic(nicProfile);
}
final Commands cmds = new Commands(Command.OnError.Stop);
s_logger.debug("Finalizing commands that need to be send to complete Start process for the vm " + vm);
if (vmGuru.finalizeCommandsOnStart(cmds, profile)) {
if (cmds.size() != 0) {
_agentMgr.send(vm.getHostId(), cmds);
}
if (vmGuru.finalizeStart(profile, vm.getHostId(), cmds, null)) {
stateTransitTo(vm, cause, vm.getHostId());
} else {
s_logger.error("Unable to finish finialization for running vm: " + vm);
}
} else {
s_logger.error("Unable to finalize commands on start for vm: " + vm);
}
if (work != null) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Updating outstanding work item to Done, id:" + work.getId());
}
work.setStep(Step.Done);
_workDao.update(work.getId(), work);
}
}
@Override
public boolean isRecurring() {
return true;
}
@Override
public boolean processAnswers(final long agentId, final long seq, final Answer[] answers) {
for (final Answer answer : answers) {
if ( answer instanceof ClusterVMMetaDataSyncAnswer) {
final ClusterVMMetaDataSyncAnswer cvms = (ClusterVMMetaDataSyncAnswer)answer;
if (!cvms.isExecuted()) {
syncVMMetaData(cvms.getVMMetaDatum());
cvms.setExecuted();
}
}
}
return true;
}
@Override
public boolean processTimeout(final long agentId, final long seq) {
return true;
}
@Override
public int getTimeout() {
return -1;
}
@Override
public boolean processCommands(final long agentId, final long seq, final Command[] cmds) {
boolean processed = false;
for (final Command cmd : cmds) {
if (cmd instanceof PingRoutingCommand) {
final PingRoutingCommand ping = (PingRoutingCommand)cmd;
if (ping.getHostVmStateReport() != null) {
_syncMgr.processHostVmStatePingReport(agentId, ping.getHostVmStateReport());
}
// take the chance to scan VMs that are stuck in transitional states
// and are missing from the report
scanStalledVMInTransitionStateOnUpHost(agentId);
processed = true;
}
}
return processed;
}
@Override
public AgentControlAnswer processControlCommand(final long agentId, final AgentControlCommand cmd) {
return null;
}
@Override
public boolean processDisconnect(final long agentId, final Status state) {
return true;
}
@Override
public void processHostAboutToBeRemoved(long hostId) {
}
@Override
public void processHostRemoved(long hostId, long clusterId) {
}
@Override
public void processHostAdded(long hostId) {
}
@Override
public void processConnect(final Host agent, final StartupCommand cmd, final boolean forRebalance) throws ConnectionException {
if (!(cmd instanceof StartupRoutingCommand)) {
return;
}
if(s_logger.isDebugEnabled()) {
s_logger.debug("Received startup command from hypervisor host. host id: " + agent.getId());
}
_syncMgr.resetHostSyncState(agent.getId());
if (forRebalance) {
s_logger.debug("Not processing listener " + this + " as connect happens on rebalance process");
return;
}
final Long clusterId = agent.getClusterId();
final long agentId = agent.getId();
if (agent.getHypervisorType() == HypervisorType.XenServer) { // only for Xen
// initiate the cron job
final ClusterVMMetaDataSyncCommand syncVMMetaDataCmd = new ClusterVMMetaDataSyncCommand(ClusterVMMetaDataSyncInterval.value(), clusterId);
try {
final long seq_no = _agentMgr.send(agentId, new Commands(syncVMMetaDataCmd), this);
s_logger.debug("Cluster VM metadata sync started with jobid " + seq_no);
} catch (final AgentUnavailableException e) {
s_logger.fatal("The Cluster VM metadata sync process failed for cluster id " + clusterId + " with ", e);
}
}
}
protected class TransitionTask extends ManagedContextRunnable {
@Override
protected void runInContext() {
final GlobalLock lock = GlobalLock.getInternLock("TransitionChecking");
if (lock == null) {
s_logger.debug("Couldn't get the global lock");
return;
}
if (!lock.lock(30)) {
s_logger.debug("Couldn't lock the db");
return;
}
try {
scanStalledVMInTransitionStateOnDisconnectedHosts();
final List<VMInstanceVO> instances = _vmDao.findVMInTransition(new Date(DateUtil.currentGMTTime().getTime() - AgentManager.Wait.value() * 1000), State.Starting, State.Stopping);
for (final VMInstanceVO instance : instances) {
final State state = instance.getState();
if (state == State.Stopping) {
_haMgr.scheduleStop(instance, instance.getHostId(), WorkType.CheckStop);
} else if (state == State.Starting) {
_haMgr.scheduleRestart(instance, true);
}
}
} catch (final Exception e) {
s_logger.warn("Caught the following exception on transition checking", e);
} finally {
lock.unlock();
}
}
}
@Override
public VMInstanceVO findById(final long vmId) {
return _vmDao.findById(vmId);
}
@Override
public void checkIfCanUpgrade(final VirtualMachine vmInstance, final ServiceOffering newServiceOffering) {
if (newServiceOffering == null) {
throw new InvalidParameterValueException("Invalid parameter, newServiceOffering can't be null");
}
// Check that the VM is stopped / running
if (!(vmInstance.getState().equals(State.Stopped) || vmInstance.getState().equals(State.Running))) {
s_logger.warn("Unable to upgrade virtual machine " + vmInstance.toString() + " in state " + vmInstance.getState());
throw new InvalidParameterValueException("Unable to upgrade virtual machine " + vmInstance.toString() + " " + " in state " + vmInstance.getState() +
"; make sure the virtual machine is stopped/running");
}
// Check if the service offering being upgraded to is what the VM is already running with
if (!newServiceOffering.isDynamic() && vmInstance.getServiceOfferingId() == newServiceOffering.getId()) {
if (s_logger.isInfoEnabled()) {
s_logger.info("Not upgrading vm " + vmInstance.toString() + " since it already has the requested " + "service offering (" + newServiceOffering.getName() +
")");
}
throw new InvalidParameterValueException("Not upgrading vm " + vmInstance.toString() + " since it already " + "has the requested service offering (" +
newServiceOffering.getName() + ")");
}
final ServiceOfferingVO currentServiceOffering = _offeringDao.findByIdIncludingRemoved(vmInstance.getId(), vmInstance.getServiceOfferingId());
// Check that the service offering being upgraded to has the same Guest IP type as the VM's current service offering
// NOTE: With the new network refactoring in 2.2, we shouldn't need the check for same guest IP type anymore.
/*
* if (!currentServiceOffering.getGuestIpType().equals(newServiceOffering.getGuestIpType())) { String errorMsg =
* "The service offering being upgraded to has a guest IP type: " + newServiceOffering.getGuestIpType(); errorMsg +=
* ". Please select a service offering with the same guest IP type as the VM's current service offering (" +
* currentServiceOffering.getGuestIpType() + ")."; throw new InvalidParameterValueException(errorMsg); }
*/
// Check that the service offering being upgraded to has the same storage pool preference as the VM's current service
// offering
if (currentServiceOffering.getUseLocalStorage() != newServiceOffering.getUseLocalStorage()) {
throw new InvalidParameterValueException("Unable to upgrade virtual machine " + vmInstance.toString() +
", cannot switch between local storage and shared storage service offerings. Current offering " + "useLocalStorage=" +
currentServiceOffering.getUseLocalStorage() + ", target offering useLocalStorage=" + newServiceOffering.getUseLocalStorage());
}
// if vm is a system vm, check if it is a system service offering, if yes return with error as it cannot be used for user vms
if (currentServiceOffering.getSystemUse() != newServiceOffering.getSystemUse()) {
throw new InvalidParameterValueException("isSystem property is different for current service offering and new service offering");
}
// Check that there are enough resources to upgrade the service offering
if (!isVirtualMachineUpgradable(vmInstance, newServiceOffering)) {
throw new InvalidParameterValueException("Unable to upgrade virtual machine, not enough resources available " + "for an offering of " +
newServiceOffering.getCpu() + " cpu(s) at " + newServiceOffering.getSpeed() + " Mhz, and " + newServiceOffering.getRamSize() + " MB of memory");
}
// Check that the service offering being upgraded to has all the tags of the current service offering.
final List<String> currentTags = StringUtils.csvTagsToList(currentServiceOffering.getTags());
final List<String> newTags = StringUtils.csvTagsToList(newServiceOffering.getTags());
if (!newTags.containsAll(currentTags)) {
throw new InvalidParameterValueException("Unable to upgrade virtual machine; the current service offering " + " should have tags as subset of " +
"the new service offering tags. Current service offering tags: " + currentTags + "; " + "new service " + "offering tags: " + newTags);
}
}
@Override
public boolean upgradeVmDb(final long vmId, final long serviceOfferingId) {
final VMInstanceVO vmForUpdate = _vmDao.createForUpdate();
vmForUpdate.setServiceOfferingId(serviceOfferingId);
final ServiceOffering newSvcOff = _entityMgr.findById(ServiceOffering.class, serviceOfferingId);
vmForUpdate.setHaEnabled(newSvcOff.getOfferHA());
vmForUpdate.setLimitCpuUse(newSvcOff.getLimitCpuUse());
vmForUpdate.setServiceOfferingId(newSvcOff.getId());
return _vmDao.update(vmId, vmForUpdate);
}
@Override
public NicProfile addVmToNetwork(final VirtualMachine vm, final Network network, final NicProfile requested)
throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException {
final AsyncJobExecutionContext jobContext = AsyncJobExecutionContext.getCurrentExecutionContext();
if (jobContext.isJobDispatchedBy(VmWorkConstants.VM_WORK_JOB_DISPATCHER)) {
// avoid re-entrance
VmWorkJobVO placeHolder = null;
placeHolder = createPlaceHolderWork(vm.getId());
try {
return orchestrateAddVmToNetwork(vm, network, requested);
} finally {
if (placeHolder != null) {
_workJobDao.expunge(placeHolder.getId());
}
}
} else {
final Outcome<VirtualMachine> outcome = addVmToNetworkThroughJobQueue(vm, network, requested);
try {
outcome.get();
} catch (final InterruptedException e) {
throw new RuntimeException("Operation is interrupted", e);
} catch (final java.util.concurrent.ExecutionException e) {
throw new RuntimeException("Execution exception", e);
}
final Object jobException = _jobMgr.unmarshallResultObject(outcome.getJob());
if (jobException != null) {
if (jobException instanceof ResourceUnavailableException) {
throw (ResourceUnavailableException)jobException;
} else if (jobException instanceof ConcurrentOperationException) {
throw (ConcurrentOperationException)jobException;
} else if (jobException instanceof InsufficientCapacityException) {
throw (InsufficientCapacityException)jobException;
} else if (jobException instanceof RuntimeException) {
throw (RuntimeException)jobException;
} else if (jobException instanceof Throwable) {
throw new RuntimeException("Unexpected exception", (Throwable)jobException);
} else if (jobException instanceof NicProfile) {
return (NicProfile)jobException;
}
}
throw new RuntimeException("Unexpected job execution result");
}
}
private NicProfile orchestrateAddVmToNetwork(final VirtualMachine vm, final Network network, final NicProfile requested) throws ConcurrentOperationException, ResourceUnavailableException,
InsufficientCapacityException {
final CallContext cctx = CallContext.current();
s_logger.debug("Adding vm " + vm + " to network " + network + "; requested nic profile " + requested);
final VMInstanceVO vmVO = _vmDao.findById(vm.getId());
final ReservationContext context = new ReservationContextImpl(null, null, cctx.getCallingUser(), cctx.getCallingAccount());
final VirtualMachineProfileImpl vmProfile = new VirtualMachineProfileImpl(vmVO, null, null, null, null);
final DataCenter dc = _entityMgr.findById(DataCenter.class, network.getDataCenterId());
final Host host = _hostDao.findById(vm.getHostId());
final DeployDestination dest = new DeployDestination(dc, null, null, host);
//check vm state
if (vm.getState() == State.Running) {
//1) allocate and prepare nic
final NicProfile nic = _networkMgr.createNicForVm(network, requested, context, vmProfile, true);
//2) Convert vmProfile to vmTO
final HypervisorGuru hvGuru = _hvGuruMgr.getGuru(vmProfile.getVirtualMachine().getHypervisorType());
final VirtualMachineTO vmTO = hvGuru.implement(vmProfile);
//3) Convert nicProfile to NicTO
final NicTO nicTO = toNicTO(nic, vmProfile.getVirtualMachine().getHypervisorType());
//4) plug the nic to the vm
s_logger.debug("Plugging nic for vm " + vm + " in network " + network);
boolean result = false;
try {
result = plugNic(network, nicTO, vmTO, context, dest);
if (result) {
s_logger.debug("Nic is plugged successfully for vm " + vm + " in network " + network + ". Vm is a part of network now");
final long isDefault = nic.isDefaultNic() ? 1 : 0;
// insert nic's Id into DB as resource_name
if(VirtualMachine.Type.User.equals(vmVO.getType())) {
//Log usage event for user Vms only
UsageEventUtils.publishUsageEvent(EventTypes.EVENT_NETWORK_OFFERING_ASSIGN, vmVO.getAccountId(), vmVO.getDataCenterId(), vmVO.getId(),
Long.toString(nic.getId()), network.getNetworkOfferingId(), null, isDefault, VirtualMachine.class.getName(), vmVO.getUuid(), vm.isDisplay());
}
return nic;
} else {
s_logger.warn("Failed to plug nic to the vm " + vm + " in network " + network);
return null;
}
} finally {
if (!result) {
s_logger.debug("Removing nic " + nic + " from vm " + vmProfile.getVirtualMachine() + " as nic plug failed on the backend");
_networkMgr.removeNic(vmProfile, _nicsDao.findById(nic.getId()));
}
}
} else if (vm.getState() == State.Stopped) {
//1) allocate nic
return _networkMgr.createNicForVm(network, requested, context, vmProfile, false);
} else {
s_logger.warn("Unable to add vm " + vm + " to network " + network);
throw new ResourceUnavailableException("Unable to add vm " + vm + " to network, is not in the right state", DataCenter.class, vm.getDataCenterId());
}
}
@Override
public NicTO toNicTO(final NicProfile nic, final HypervisorType hypervisorType) {
final HypervisorGuru hvGuru = _hvGuruMgr.getGuru(hypervisorType);
final NicTO nicTO = hvGuru.toNicTO(nic);
return nicTO;
}
@Override
public boolean removeNicFromVm(final VirtualMachine vm, final Nic nic)
throws ConcurrentOperationException, ResourceUnavailableException {
final AsyncJobExecutionContext jobContext = AsyncJobExecutionContext.getCurrentExecutionContext();
if (jobContext.isJobDispatchedBy(VmWorkConstants.VM_WORK_JOB_DISPATCHER)) {
// avoid re-entrance
VmWorkJobVO placeHolder = null;
placeHolder = createPlaceHolderWork(vm.getId());
try {
return orchestrateRemoveNicFromVm(vm, nic);
} finally {
if (placeHolder != null) {
_workJobDao.expunge(placeHolder.getId());
}
}
} else {
final Outcome<VirtualMachine> outcome = removeNicFromVmThroughJobQueue(vm, nic);
try {
outcome.get();
} catch (final InterruptedException e) {
throw new RuntimeException("Operation is interrupted", e);
} catch (final java.util.concurrent.ExecutionException e) {
throw new RuntimeException("Execution excetion", e);
}
final Object jobResult = _jobMgr.unmarshallResultObject(outcome.getJob());
if (jobResult != null) {
if (jobResult instanceof ResourceUnavailableException) {
throw (ResourceUnavailableException)jobResult;
} else if (jobResult instanceof ConcurrentOperationException) {
throw (ConcurrentOperationException)jobResult;
} else if (jobResult instanceof RuntimeException) {
throw (RuntimeException)jobResult;
} else if (jobResult instanceof Throwable) {
throw new RuntimeException("Unexpected exception", (Throwable)jobResult);
} else if (jobResult instanceof Boolean) {
return (Boolean)jobResult;
}
}
throw new RuntimeException("Job failed with un-handled exception");
}
}
private boolean orchestrateRemoveNicFromVm(final VirtualMachine vm, final Nic nic) throws ConcurrentOperationException, ResourceUnavailableException {
final CallContext cctx = CallContext.current();
final VMInstanceVO vmVO = _vmDao.findById(vm.getId());
final NetworkVO network = _networkDao.findById(nic.getNetworkId());
final ReservationContext context = new ReservationContextImpl(null, null, cctx.getCallingUser(), cctx.getCallingAccount());
final VirtualMachineProfileImpl vmProfile = new VirtualMachineProfileImpl(vmVO, null, null, null, null);
final DataCenter dc = _entityMgr.findById(DataCenter.class, network.getDataCenterId());
final Host host = _hostDao.findById(vm.getHostId());
final DeployDestination dest = new DeployDestination(dc, null, null, host);
final HypervisorGuru hvGuru = _hvGuruMgr.getGuru(vmProfile.getVirtualMachine().getHypervisorType());
final VirtualMachineTO vmTO = hvGuru.implement(vmProfile);
final NicProfile nicProfile =
new NicProfile(nic, network, nic.getBroadcastUri(), nic.getIsolationUri(), _networkModel.getNetworkRate(network.getId(), vm.getId()),
_networkModel.isSecurityGroupSupportedInNetwork(network), _networkModel.getNetworkTag(vmProfile.getVirtualMachine().getHypervisorType(), network));
//1) Unplug the nic
if (vm.getState() == State.Running) {
final NicTO nicTO = toNicTO(nicProfile, vmProfile.getVirtualMachine().getHypervisorType());
s_logger.debug("Un-plugging nic " + nic + " for vm " + vm + " from network " + network);
final boolean result = unplugNic(network, nicTO, vmTO, context, dest);
if (result) {
s_logger.debug("Nic is unplugged successfully for vm " + vm + " in network " + network);
final long isDefault = nic.isDefaultNic() ? 1 : 0;
UsageEventUtils.publishUsageEvent(EventTypes.EVENT_NETWORK_OFFERING_REMOVE, vm.getAccountId(), vm.getDataCenterId(), vm.getId(),
Long.toString(nic.getId()), network.getNetworkOfferingId(), null, isDefault, VirtualMachine.class.getName(), vm.getUuid(), vm.isDisplay());
} else {
s_logger.warn("Failed to unplug nic for the vm " + vm + " from network " + network);
return false;
}
} else if (vm.getState() != State.Stopped) {
s_logger.warn("Unable to remove vm " + vm + " from network " + network);
throw new ResourceUnavailableException("Unable to remove vm " + vm + " from network, is not in the right state", DataCenter.class, vm.getDataCenterId());
}
//2) Release the nic
_networkMgr.releaseNic(vmProfile, nic);
s_logger.debug("Successfully released nic " + nic + "for vm " + vm);
//3) Remove the nic
_networkMgr.removeNic(vmProfile, nic);
_nicsDao.expunge(nic.getId());
return true;
}
@Override
@DB
public boolean removeVmFromNetwork(final VirtualMachine vm, final Network network, final URI broadcastUri) throws ConcurrentOperationException, ResourceUnavailableException {
// TODO will serialize on the VM object later to resolve operation conflicts
return orchestrateRemoveVmFromNetwork(vm, network, broadcastUri);
}
@DB
private boolean orchestrateRemoveVmFromNetwork(final VirtualMachine vm, final Network network, final URI broadcastUri) throws ConcurrentOperationException, ResourceUnavailableException {
final CallContext cctx = CallContext.current();
final VMInstanceVO vmVO = _vmDao.findById(vm.getId());
final ReservationContext context = new ReservationContextImpl(null, null, cctx.getCallingUser(), cctx.getCallingAccount());
final VirtualMachineProfileImpl vmProfile = new VirtualMachineProfileImpl(vmVO, null, null, null, null);
final DataCenter dc = _entityMgr.findById(DataCenter.class, network.getDataCenterId());
final Host host = _hostDao.findById(vm.getHostId());
final DeployDestination dest = new DeployDestination(dc, null, null, host);
final HypervisorGuru hvGuru = _hvGuruMgr.getGuru(vmProfile.getVirtualMachine().getHypervisorType());
final VirtualMachineTO vmTO = hvGuru.implement(vmProfile);
Nic nic = null;
if (broadcastUri != null) {
nic = _nicsDao.findByNetworkIdInstanceIdAndBroadcastUri(network.getId(), vm.getId(), broadcastUri.toString());
} else {
nic = _networkModel.getNicInNetwork(vm.getId(), network.getId());
}
if (nic == null) {
s_logger.warn("Could not get a nic with " + network);
return false;
}
// don't delete default NIC on a user VM
if (nic.isDefaultNic() && vm.getType() == VirtualMachine.Type.User) {
s_logger.warn("Failed to remove nic from " + vm + " in " + network + ", nic is default.");
throw new CloudRuntimeException("Failed to remove nic from " + vm + " in " + network + ", nic is default.");
}
//Lock on nic is needed here
final Nic lock = _nicsDao.acquireInLockTable(nic.getId());
if (lock == null) {
//check if nic is still there. Return if it was released already
if (_nicsDao.findById(nic.getId()) == null) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Not need to remove the vm " + vm + " from network " + network + " as the vm doesn't have nic in this network");
}
return true;
}
throw new ConcurrentOperationException("Unable to lock nic " + nic.getId());
}
if (s_logger.isDebugEnabled()) {
s_logger.debug("Lock is acquired for nic id " + lock.getId() + " as a part of remove vm " + vm + " from network " + network);
}
try {
final NicProfile nicProfile =
new NicProfile(nic, network, nic.getBroadcastUri(), nic.getIsolationUri(), _networkModel.getNetworkRate(network.getId(), vm.getId()),
_networkModel.isSecurityGroupSupportedInNetwork(network), _networkModel.getNetworkTag(vmProfile.getVirtualMachine().getHypervisorType(), network));
//1) Unplug the nic
if (vm.getState() == State.Running) {
final NicTO nicTO = toNicTO(nicProfile, vmProfile.getVirtualMachine().getHypervisorType());
s_logger.debug("Un-plugging nic for vm " + vm + " from network " + network);
final boolean result = unplugNic(network, nicTO, vmTO, context, dest);
if (result) {
s_logger.debug("Nic is unplugged successfully for vm " + vm + " in network " + network);
} else {
s_logger.warn("Failed to unplug nic for the vm " + vm + " from network " + network);
return false;
}
} else if (vm.getState() != State.Stopped) {
s_logger.warn("Unable to remove vm " + vm + " from network " + network);
throw new ResourceUnavailableException("Unable to remove vm " + vm + " from network, is not in the right state", DataCenter.class, vm.getDataCenterId());
}
//2) Release the nic
_networkMgr.releaseNic(vmProfile, nic);
s_logger.debug("Successfully released nic " + nic + "for vm " + vm);
//3) Remove the nic
_networkMgr.removeNic(vmProfile, nic);
return true;
} finally {
if (lock != null) {
_nicsDao.releaseFromLockTable(lock.getId());
if (s_logger.isDebugEnabled()) {
s_logger.debug("Lock is released for nic id " + lock.getId() + " as a part of remove vm " + vm + " from network " + network);
}
}
}
}
@Override
public void findHostAndMigrate(final String vmUuid, final Long newSvcOfferingId, final ExcludeList excludes) throws InsufficientCapacityException, ConcurrentOperationException,
ResourceUnavailableException {
final VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
if (vm == null) {
throw new CloudRuntimeException("Unable to find " + vmUuid);
}
final VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm);
final Long srcHostId = vm.getHostId();
final Long oldSvcOfferingId = vm.getServiceOfferingId();
if (srcHostId == null) {
throw new CloudRuntimeException("Unable to scale the vm because it doesn't have a host id");
}
final Host host = _hostDao.findById(srcHostId);
final DataCenterDeployment plan = new DataCenterDeployment(host.getDataCenterId(), host.getPodId(), host.getClusterId(), null, null, null);
excludes.addHost(vm.getHostId());
vm.setServiceOfferingId(newSvcOfferingId); // Need to find the destination host based on new svc offering
DeployDestination dest = null;
try {
dest = _dpMgr.planDeployment(profile, plan, excludes, null);
} catch (final AffinityConflictException e2) {
s_logger.warn("Unable to create deployment, affinity rules associted to the VM conflict", e2);
throw new CloudRuntimeException("Unable to create deployment, affinity rules associted to the VM conflict");
}
if (dest != null) {
if (s_logger.isDebugEnabled()) {
s_logger.debug(" Found " + dest + " for scaling the vm to.");
}
}
if (dest == null) {
throw new InsufficientServerCapacityException("Unable to find a server to scale the vm to.", host.getClusterId());
}
excludes.addHost(dest.getHost().getId());
try {
migrateForScale(vm.getUuid(), srcHostId, dest, oldSvcOfferingId);
} catch (final ResourceUnavailableException e) {
s_logger.debug("Unable to migrate to unavailable " + dest);
throw e;
} catch (final ConcurrentOperationException e) {
s_logger.debug("Unable to migrate VM due to: " + e.getMessage());
throw e;
}
}
@Override
public void migrateForScale(final String vmUuid, final long srcHostId, final DeployDestination dest, final Long oldSvcOfferingId)
throws ResourceUnavailableException, ConcurrentOperationException {
final AsyncJobExecutionContext jobContext = AsyncJobExecutionContext.getCurrentExecutionContext();
if (jobContext.isJobDispatchedBy(VmWorkConstants.VM_WORK_JOB_DISPATCHER)) {
// avoid re-entrance
VmWorkJobVO placeHolder = null;
final VirtualMachine vm = _vmDao.findByUuid(vmUuid);
placeHolder = createPlaceHolderWork(vm.getId());
try {
orchestrateMigrateForScale(vmUuid, srcHostId, dest, oldSvcOfferingId);
} finally {
if (placeHolder != null) {
_workJobDao.expunge(placeHolder.getId());
}
}
} else {
final Outcome<VirtualMachine> outcome = migrateVmForScaleThroughJobQueue(vmUuid, srcHostId, dest, oldSvcOfferingId);
try {
final VirtualMachine vm = outcome.get();
} catch (final InterruptedException e) {
throw new RuntimeException("Operation is interrupted", e);
} catch (final java.util.concurrent.ExecutionException e) {
throw new RuntimeException("Execution excetion", e);
}
final Object jobResult = _jobMgr.unmarshallResultObject(outcome.getJob());
if (jobResult != null) {
if (jobResult instanceof ResourceUnavailableException) {
throw (ResourceUnavailableException)jobResult;
} else if (jobResult instanceof ConcurrentOperationException) {
throw (ConcurrentOperationException)jobResult;
} else if (jobResult instanceof RuntimeException) {
throw (RuntimeException)jobResult;
} else if (jobResult instanceof Throwable) {
throw new RuntimeException("Unexpected exception", (Throwable)jobResult);
}
}
}
}
private void orchestrateMigrateForScale(final String vmUuid, final long srcHostId, final DeployDestination dest, final Long oldSvcOfferingId)
throws ResourceUnavailableException, ConcurrentOperationException {
VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
s_logger.info("Migrating " + vm + " to " + dest);
vm.getServiceOfferingId();
final long dstHostId = dest.getHost().getId();
final Host fromHost = _hostDao.findById(srcHostId);
if (fromHost == null) {
s_logger.info("Unable to find the host to migrate from: " + srcHostId);
throw new CloudRuntimeException("Unable to find the host to migrate from: " + srcHostId);
}
if (fromHost.getClusterId().longValue() != dest.getCluster().getId()) {
s_logger.info("Source and destination host are not in same cluster, unable to migrate to host: " + dstHostId);
throw new CloudRuntimeException("Source and destination host are not in same cluster, unable to migrate to host: " + dest.getHost().getId());
}
final VirtualMachineGuru vmGuru = getVmGuru(vm);
final long vmId = vm.getId();
vm = _vmDao.findByUuid(vmUuid);
if (vm == null) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Unable to find the vm " + vm);
}
throw new CloudRuntimeException("Unable to find a virtual machine with id " + vmId);
}
if (vm.getState() != State.Running) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("VM is not Running, unable to migrate the vm " + vm);
}
throw new CloudRuntimeException("VM is not Running, unable to migrate the vm currently " + vm + " , current state: " + vm.getState().toString());
}
AlertManager.AlertType alertType = AlertManager.AlertType.ALERT_TYPE_USERVM_MIGRATE;
if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
alertType = AlertManager.AlertType.ALERT_TYPE_DOMAIN_ROUTER_MIGRATE;
} else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
alertType = AlertManager.AlertType.ALERT_TYPE_CONSOLE_PROXY_MIGRATE;
}
final VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm);
_networkMgr.prepareNicForMigration(profile, dest);
volumeMgr.prepareForMigration(profile, dest);
final VirtualMachineTO to = toVmTO(profile);
final PrepareForMigrationCommand pfmc = new PrepareForMigrationCommand(to);
ItWorkVO work = new ItWorkVO(UUID.randomUUID().toString(), _nodeId, State.Migrating, vm.getType(), vm.getId());
work.setStep(Step.Prepare);
work.setResourceType(ItWorkVO.ResourceType.Host);
work.setResourceId(dstHostId);
work = _workDao.persist(work);
Answer pfma = null;
try {
pfma = _agentMgr.send(dstHostId, pfmc);
if (pfma == null || !pfma.getResult()) {
final String details = pfma != null ? pfma.getDetails() : "null answer returned";
final String msg = "Unable to prepare for migration due to " + details;
pfma = null;
throw new AgentUnavailableException(msg, dstHostId);
}
} catch (final OperationTimedoutException e1) {
throw new AgentUnavailableException("Operation timed out", dstHostId);
} finally {
if (pfma == null) {
work.setStep(Step.Done);
_workDao.update(work.getId(), work);
}
}
vm.setLastHostId(srcHostId);
try {
if (vm == null || vm.getHostId() == null || vm.getHostId() != srcHostId || !changeState(vm, Event.MigrationRequested, dstHostId, work, Step.Migrating)) {
s_logger.info("Migration cancelled because state has changed: " + vm);
throw new ConcurrentOperationException("Migration cancelled because state has changed: " + vm);
}
} catch (final NoTransitionException e1) {
s_logger.info("Migration cancelled because " + e1.getMessage());
throw new ConcurrentOperationException("Migration cancelled because " + e1.getMessage());
}
boolean migrated = false;
try {
final boolean isWindows = _guestOsCategoryDao.findById(_guestOsDao.findById(vm.getGuestOSId()).getCategoryId()).getName().equalsIgnoreCase("Windows");
final MigrateCommand mc = new MigrateCommand(vm.getInstanceName(), dest.getHost().getPrivateIpAddress(), isWindows, to, getExecuteInSequence(vm.getHypervisorType()));
String autoConvergence = _configDao.getValue(Config.KvmAutoConvergence.toString());
boolean kvmAutoConvergence = Boolean.parseBoolean(autoConvergence);
mc.setAutoConvergence(kvmAutoConvergence);
mc.setHostGuid(dest.getHost().getGuid());
try {
final Answer ma = _agentMgr.send(vm.getLastHostId(), mc);
if (ma == null || !ma.getResult()) {
final String details = ma != null ? ma.getDetails() : "null answer returned";
final String msg = "Unable to migrate due to " + details;
s_logger.error(msg);
throw new CloudRuntimeException(msg);
}
} catch (final OperationTimedoutException e) {
if (e.isActive()) {
s_logger.warn("Active migration command so scheduling a restart for " + vm);
_haMgr.scheduleRestart(vm, true);
}
throw new AgentUnavailableException("Operation timed out on migrating " + vm, dstHostId);
}
try {
final long newServiceOfferingId = vm.getServiceOfferingId();
vm.setServiceOfferingId(oldSvcOfferingId); // release capacity for the old service offering only
if (!changeState(vm, VirtualMachine.Event.OperationSucceeded, dstHostId, work, Step.Started)) {
throw new ConcurrentOperationException("Unable to change the state for " + vm);
}
vm.setServiceOfferingId(newServiceOfferingId);
} catch (final NoTransitionException e1) {
throw new ConcurrentOperationException("Unable to change state due to " + e1.getMessage());
}
try {
if (!checkVmOnHost(vm, dstHostId)) {
s_logger.error("Unable to complete migration for " + vm);
try {
_agentMgr.send(srcHostId, new Commands(cleanup(vm.getInstanceName())), null);
} catch (final AgentUnavailableException e) {
s_logger.error("AgentUnavailableException while cleanup on source host: " + srcHostId);
}
cleanup(vmGuru, new VirtualMachineProfileImpl(vm), work, Event.AgentReportStopped, true);
throw new CloudRuntimeException("Unable to complete migration for " + vm);
}
} catch (final OperationTimedoutException e) {
s_logger.debug("Error while checking the vm " + vm + " on host " + dstHostId, e);
}
migrated = true;
} finally {
if (!migrated) {
s_logger.info("Migration was unsuccessful. Cleaning up: " + vm);
_alertMgr.sendAlert(alertType, fromHost.getDataCenterId(), fromHost.getPodId(),
"Unable to migrate vm " + vm.getInstanceName() + " from host " + fromHost.getName() + " in zone " + dest.getDataCenter().getName() + " and pod " +
dest.getPod().getName(), "Migrate Command failed. Please check logs.");
try {
_agentMgr.send(dstHostId, new Commands(cleanup(vm.getInstanceName())), null);
} catch (final AgentUnavailableException ae) {
s_logger.info("Looks like the destination Host is unavailable for cleanup");
}
try {
stateTransitTo(vm, Event.OperationFailed, srcHostId);
} catch (final NoTransitionException e) {
s_logger.warn(e.getMessage());
}
}
work.setStep(Step.Done);
_workDao.update(work.getId(), work);
}
}
@Override
public boolean replugNic(final Network network, final NicTO nic, final VirtualMachineTO vm, final ReservationContext context, final DeployDestination dest) throws ConcurrentOperationException,
ResourceUnavailableException, InsufficientCapacityException {
boolean result = true;
final VMInstanceVO router = _vmDao.findById(vm.getId());
if (router.getState() == State.Running) {
try {
final ReplugNicCommand replugNicCmd = new ReplugNicCommand(nic, vm.getName(), vm.getType(), vm.getDetails());
final Commands cmds = new Commands(Command.OnError.Stop);
cmds.addCommand("replugnic", replugNicCmd);
_agentMgr.send(dest.getHost().getId(), cmds);
final ReplugNicAnswer replugNicAnswer = cmds.getAnswer(ReplugNicAnswer.class);
if (replugNicAnswer == null || !replugNicAnswer.getResult()) {
s_logger.warn("Unable to replug nic for vm " + vm.getName());
result = false;
}
} catch (final OperationTimedoutException e) {
throw new AgentUnavailableException("Unable to plug nic for router " + vm.getName() + " in network " + network, dest.getHost().getId(), e);
}
} else {
s_logger.warn("Unable to apply ReplugNic, vm " + router + " is not in the right state " + router.getState());
throw new ResourceUnavailableException("Unable to apply ReplugNic on the backend," + " vm " + vm + " is not in the right state", DataCenter.class,
router.getDataCenterId());
}
return result;
}
public boolean plugNic(final Network network, final NicTO nic, final VirtualMachineTO vm, final ReservationContext context, final DeployDestination dest) throws ConcurrentOperationException,
ResourceUnavailableException, InsufficientCapacityException {
boolean result = true;
final VMInstanceVO router = _vmDao.findById(vm.getId());
if (router.getState() == State.Running) {
try {
final PlugNicCommand plugNicCmd = new PlugNicCommand(nic, vm.getName(), vm.getType(), vm.getDetails());
final Commands cmds = new Commands(Command.OnError.Stop);
cmds.addCommand("plugnic", plugNicCmd);
_agentMgr.send(dest.getHost().getId(), cmds);
final PlugNicAnswer plugNicAnswer = cmds.getAnswer(PlugNicAnswer.class);
if (plugNicAnswer == null || !plugNicAnswer.getResult()) {
s_logger.warn("Unable to plug nic for vm " + vm.getName());
result = false;
}
} catch (final OperationTimedoutException e) {
throw new AgentUnavailableException("Unable to plug nic for router " + vm.getName() + " in network " + network, dest.getHost().getId(), e);
}
} else {
s_logger.warn("Unable to apply PlugNic, vm " + router + " is not in the right state " + router.getState());
throw new ResourceUnavailableException("Unable to apply PlugNic on the backend," + " vm " + vm + " is not in the right state", DataCenter.class,
router.getDataCenterId());
}
return result;
}
public boolean unplugNic(final Network network, final NicTO nic, final VirtualMachineTO vm, final ReservationContext context, final DeployDestination dest) throws ConcurrentOperationException,
ResourceUnavailableException {
boolean result = true;
final VMInstanceVO router = _vmDao.findById(vm.getId());
if (router.getState() == State.Running) {
// collect vm network statistics before unplug a nic
UserVmVO userVm = _userVmDao.findById(vm.getId());
if (userVm != null && userVm.getType() == VirtualMachine.Type.User) {
_userVmService.collectVmNetworkStatistics(userVm);
}
try {
final Commands cmds = new Commands(Command.OnError.Stop);
final UnPlugNicCommand unplugNicCmd = new UnPlugNicCommand(nic, vm.getName());
cmds.addCommand("unplugnic", unplugNicCmd);
_agentMgr.send(dest.getHost().getId(), cmds);
final UnPlugNicAnswer unplugNicAnswer = cmds.getAnswer(UnPlugNicAnswer.class);
if (unplugNicAnswer == null || !unplugNicAnswer.getResult()) {
s_logger.warn("Unable to unplug nic from router " + router);
result = false;
}
} catch (final OperationTimedoutException e) {
throw new AgentUnavailableException("Unable to unplug nic from rotuer " + router + " from network " + network, dest.getHost().getId(), e);
}
} else if (router.getState() == State.Stopped || router.getState() == State.Stopping) {
s_logger.debug("Vm " + router.getInstanceName() + " is in " + router.getState() + ", so not sending unplug nic command to the backend");
} else {
s_logger.warn("Unable to apply unplug nic, Vm " + router + " is not in the right state " + router.getState());
throw new ResourceUnavailableException("Unable to apply unplug nic on the backend," + " vm " + router + " is not in the right state", DataCenter.class,
router.getDataCenterId());
}
return result;
}
@Override
public VMInstanceVO reConfigureVm(final String vmUuid, final ServiceOffering oldServiceOffering,
final boolean reconfiguringOnExistingHost)
throws ResourceUnavailableException, InsufficientServerCapacityException, ConcurrentOperationException {
final AsyncJobExecutionContext jobContext = AsyncJobExecutionContext.getCurrentExecutionContext();
if (jobContext.isJobDispatchedBy(VmWorkConstants.VM_WORK_JOB_DISPATCHER)) {
// avoid re-entrance
VmWorkJobVO placeHolder = null;
final VirtualMachine vm = _vmDao.findByUuid(vmUuid);
placeHolder = createPlaceHolderWork(vm.getId());
try {
return orchestrateReConfigureVm(vmUuid, oldServiceOffering, reconfiguringOnExistingHost);
} finally {
if (placeHolder != null) {
_workJobDao.expunge(placeHolder.getId());
}
}
} else {
final Outcome<VirtualMachine> outcome = reconfigureVmThroughJobQueue(vmUuid, oldServiceOffering, reconfiguringOnExistingHost);
VirtualMachine vm = null;
try {
vm = outcome.get();
} catch (final InterruptedException e) {
throw new RuntimeException("Operation is interrupted", e);
} catch (final java.util.concurrent.ExecutionException e) {
throw new RuntimeException("Execution excetion", e);
}
final Object jobResult = _jobMgr.unmarshallResultObject(outcome.getJob());
if (jobResult != null) {
if (jobResult instanceof ResourceUnavailableException) {
throw (ResourceUnavailableException)jobResult;
} else if (jobResult instanceof ConcurrentOperationException) {
throw (ConcurrentOperationException)jobResult;
} else if (jobResult instanceof InsufficientServerCapacityException) {
throw (InsufficientServerCapacityException)jobResult;
} else if (jobResult instanceof Throwable) {
s_logger.error("Unhandled exception", (Throwable)jobResult);
throw new RuntimeException("Unhandled exception", (Throwable)jobResult);
}
}
return (VMInstanceVO)vm;
}
}
private VMInstanceVO orchestrateReConfigureVm(final String vmUuid, final ServiceOffering oldServiceOffering, final boolean reconfiguringOnExistingHost) throws ResourceUnavailableException,
ConcurrentOperationException {
final VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
final long newServiceofferingId = vm.getServiceOfferingId();
final ServiceOffering newServiceOffering = _offeringDao.findById(vm.getId(), newServiceofferingId);
final HostVO hostVo = _hostDao.findById(vm.getHostId());
final Float memoryOvercommitRatio = CapacityManager.MemOverprovisioningFactor.valueIn(hostVo.getClusterId());
final Float cpuOvercommitRatio = CapacityManager.CpuOverprovisioningFactor.valueIn(hostVo.getClusterId());
final long minMemory = (long)(newServiceOffering.getRamSize() / memoryOvercommitRatio);
final ScaleVmCommand reconfigureCmd =
new ScaleVmCommand(vm.getInstanceName(), newServiceOffering.getCpu(), (int)(newServiceOffering.getSpeed() / cpuOvercommitRatio),
newServiceOffering.getSpeed(), minMemory * 1024L * 1024L, newServiceOffering.getRamSize() * 1024L * 1024L, newServiceOffering.getLimitCpuUse());
final Long dstHostId = vm.getHostId();
if(vm.getHypervisorType().equals(HypervisorType.VMware)) {
final HypervisorGuru hvGuru = _hvGuruMgr.getGuru(vm.getHypervisorType());
Map<String, String> details = null;
details = hvGuru.getClusterSettings(vm.getId());
reconfigureCmd.getVirtualMachine().setDetails(details);
}
final ItWorkVO work = new ItWorkVO(UUID.randomUUID().toString(), _nodeId, State.Running, vm.getType(), vm.getId());
work.setStep(Step.Prepare);
work.setResourceType(ItWorkVO.ResourceType.Host);
work.setResourceId(vm.getHostId());
_workDao.persist(work);
boolean success = false;
try {
if (reconfiguringOnExistingHost) {
vm.setServiceOfferingId(oldServiceOffering.getId());
_capacityMgr.releaseVmCapacity(vm, false, false, vm.getHostId()); //release the old capacity
vm.setServiceOfferingId(newServiceofferingId);
_capacityMgr.allocateVmCapacity(vm, false); // lock the new capacity
}
final Answer reconfigureAnswer = _agentMgr.send(vm.getHostId(), reconfigureCmd);
if (reconfigureAnswer == null || !reconfigureAnswer.getResult()) {
s_logger.error("Unable to scale vm due to " + (reconfigureAnswer == null ? "" : reconfigureAnswer.getDetails()));
throw new CloudRuntimeException("Unable to scale vm due to " + (reconfigureAnswer == null ? "" : reconfigureAnswer.getDetails()));
}
success = true;
} catch (final OperationTimedoutException e) {
throw new AgentUnavailableException("Operation timed out on reconfiguring " + vm, dstHostId);
} catch (final AgentUnavailableException e) {
throw e;
} finally {
if (!success) {
_capacityMgr.releaseVmCapacity(vm, false, false, vm.getHostId()); // release the new capacity
vm.setServiceOfferingId(oldServiceOffering.getId());
_capacityMgr.allocateVmCapacity(vm, false); // allocate the old capacity
}
}
return vm;
}
@Override
public String getConfigComponentName() {
return VirtualMachineManager.class.getSimpleName();
}
@Override
public ConfigKey<?>[] getConfigKeys() {
return new ConfigKey<?>[] {ClusterDeltaSyncInterval, StartRetry, VmDestroyForcestop, VmOpCancelInterval, VmOpCleanupInterval, VmOpCleanupWait,
VmOpLockStateRetry,
VmOpWaitInterval, ExecuteInSequence, VmJobCheckInterval, VmJobTimeout, VmJobStateReportInterval, VmConfigDriveLabel, VmConfigDriveOnPrimaryPool, HaVmRestartHostUp};
}
public List<StoragePoolAllocator> getStoragePoolAllocators() {
return _storagePoolAllocators;
}
@Inject
public void setStoragePoolAllocators(final List<StoragePoolAllocator> storagePoolAllocators) {
_storagePoolAllocators = storagePoolAllocators;
}
//
// PowerState report handling for out-of-band changes and handling of left-over transitional VM states
//
@MessageHandler(topic = Topics.VM_POWER_STATE)
protected void HandlePowerStateReport(final String subject, final String senderAddress, final Object args) {
assert args != null;
final Long vmId = (Long)args;
final List<VmWorkJobVO> pendingWorkJobs = _workJobDao.listPendingWorkJobs(
VirtualMachine.Type.Instance, vmId);
if (pendingWorkJobs.size() == 0 && !_haMgr.hasPendingHaWork(vmId)) {
// there is no pending operation job
final VMInstanceVO vm = _vmDao.findById(vmId);
if (vm != null) {
switch (vm.getPowerState()) {
case PowerOn:
handlePowerOnReportWithNoPendingJobsOnVM(vm);
break;
case PowerOff:
case PowerReportMissing:
handlePowerOffReportWithNoPendingJobsOnVM(vm);
break;
// PowerUnknown shouldn't be reported, it is a derived
// VM power state from host state (host un-reachable)
case PowerUnknown:
default:
assert false;
break;
}
} else {
s_logger.warn("VM " + vmId + " no longer exists when processing VM state report");
}
} else {
s_logger.info("There is pending job or HA tasks working on the VM. vm id: " + vmId + ", postpone power-change report by resetting power-change counters");
// reset VM power state tracking so that we won't lost signal when VM has
// been translated to
_vmDao.resetVmPowerStateTracking(vmId);
}
}
private void handlePowerOnReportWithNoPendingJobsOnVM(final VMInstanceVO vm) {
//
// 1) handle left-over transitional VM states
// 2) handle out of band VM live migration
// 3) handle out of sync stationary states, marking VM from Stopped to Running with
// alert messages
//
switch (vm.getState()) {
case Starting:
s_logger.info("VM " + vm.getInstanceName() + " is at " + vm.getState() + " and we received a power-on report while there is no pending jobs on it");
try {
stateTransitTo(vm, VirtualMachine.Event.FollowAgentPowerOnReport, vm.getPowerHostId());
} catch (final NoTransitionException e) {
s_logger.warn("Unexpected VM state transition exception, race-condition?", e);
}
s_logger.info("VM " + vm.getInstanceName() + " is sync-ed to at Running state according to power-on report from hypervisor");
// we need to alert admin or user about this risky state transition
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_SYNC, vm.getDataCenterId(), vm.getPodIdToDeployIn(),
VM_SYNC_ALERT_SUBJECT, "VM " + vm.getHostName() + "(" + vm.getInstanceName()
+ ") state is sync-ed (Starting -> Running) from out-of-context transition. VM network environment may need to be reset");
break;
case Running:
try {
if (vm.getHostId() != null && vm.getHostId().longValue() != vm.getPowerHostId().longValue()) {
s_logger.info("Detected out of band VM migration from host " + vm.getHostId() + " to host " + vm.getPowerHostId());
}
stateTransitTo(vm, VirtualMachine.Event.FollowAgentPowerOnReport, vm.getPowerHostId());
} catch (final NoTransitionException e) {
s_logger.warn("Unexpected VM state transition exception, race-condition?", e);
}
break;
case Stopping:
case Stopped:
s_logger.info("VM " + vm.getInstanceName() + " is at " + vm.getState() + " and we received a power-on report while there is no pending jobs on it");
try {
stateTransitTo(vm, VirtualMachine.Event.FollowAgentPowerOnReport, vm.getPowerHostId());
} catch (final NoTransitionException e) {
s_logger.warn("Unexpected VM state transition exception, race-condition?", e);
}
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_SYNC, vm.getDataCenterId(), vm.getPodIdToDeployIn(),
VM_SYNC_ALERT_SUBJECT, "VM " + vm.getHostName() + "(" + vm.getInstanceName() + ") state is sync-ed (" + vm.getState()
+ " -> Running) from out-of-context transition. VM network environment may need to be reset");
s_logger.info("VM " + vm.getInstanceName() + " is sync-ed to at Running state according to power-on report from hypervisor");
break;
case Destroyed:
case Expunging:
s_logger.info("Receive power on report when VM is in destroyed or expunging state. vm: "
+ vm.getId() + ", state: " + vm.getState());
break;
case Migrating:
s_logger.info("VM " + vm.getInstanceName() + " is at " + vm.getState() + " and we received a power-on report while there is no pending jobs on it");
try {
stateTransitTo(vm, VirtualMachine.Event.FollowAgentPowerOnReport, vm.getPowerHostId());
} catch (final NoTransitionException e) {
s_logger.warn("Unexpected VM state transition exception, race-condition?", e);
}
s_logger.info("VM " + vm.getInstanceName() + " is sync-ed to at Running state according to power-on report from hypervisor");
break;
case Error:
default:
s_logger.info("Receive power on report when VM is in error or unexpected state. vm: "
+ vm.getId() + ", state: " + vm.getState());
break;
}
}
private void handlePowerOffReportWithNoPendingJobsOnVM(final VMInstanceVO vm) {
// 1) handle left-over transitional VM states
// 2) handle out of sync stationary states, schedule force-stop to release resources
//
switch (vm.getState()) {
case Starting:
case Stopping:
case Running:
case Stopped:
case Migrating:
s_logger.info("VM " + vm.getInstanceName() + " is at " + vm.getState() + " and we received a power-off report while there is no pending jobs on it");
if(vm.isHaEnabled() && vm.getState() == State.Running && HaVmRestartHostUp.value() && vm.getHypervisorType() != HypervisorType.VMware && vm.getHypervisorType() != HypervisorType.Hyperv) {
s_logger.info("Detected out-of-band stop of a HA enabled VM " + vm.getInstanceName() + ", will schedule restart");
if(!_haMgr.hasPendingHaWork(vm.getId())) {
_haMgr.scheduleRestart(vm, true);
} else {
s_logger.info("VM " + vm.getInstanceName() + " already has an pending HA task working on it");
}
return;
}
final VirtualMachineGuru vmGuru = getVmGuru(vm);
final VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm);
if (!sendStop(vmGuru, profile, true, true)) {
// In case StopCommand fails, don't proceed further
return;
}
try {
stateTransitTo(vm, VirtualMachine.Event.FollowAgentPowerOffReport, null);
} catch (final NoTransitionException e) {
s_logger.warn("Unexpected VM state transition exception, race-condition?", e);
}
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_SYNC, vm.getDataCenterId(), vm.getPodIdToDeployIn(),
VM_SYNC_ALERT_SUBJECT, "VM " + vm.getHostName() + "(" + vm.getInstanceName() + ") state is sync-ed (" + vm.getState()
+ " -> Stopped) from out-of-context transition.");
s_logger.info("VM " + vm.getInstanceName() + " is sync-ed to at Stopped state according to power-off report from hypervisor");
break;
case Destroyed:
case Expunging:
break;
case Error:
default:
break;
}
}
private void scanStalledVMInTransitionStateOnUpHost(final long hostId) {
//
// Check VM that is stuck in Starting, Stopping, Migrating states, we won't check
// VMs in expunging state (this need to be handled specially)
//
// checking condition
// 1) no pending VmWork job
// 2) on hostId host and host is UP
//
// When host is UP, soon or later we will get a report from the host about the VM,
// however, if VM is missing from the host report (it may happen in out of band changes
// or from designed behave of XS/KVM), the VM may not get a chance to run the state-sync logic
//
// Therefore, we will scan thoses VMs on UP host based on last update timestamp, if the host is UP
// and a VM stalls for status update, we will consider them to be powered off
// (which is relatively safe to do so)
final long stallThresholdInMs = VmJobStateReportInterval.value() + (VmJobStateReportInterval.value() >> 1);
final Date cutTime = new Date(DateUtil.currentGMTTime().getTime() - stallThresholdInMs);
final List<Long> mostlikelyStoppedVMs = listStalledVMInTransitionStateOnUpHost(hostId, cutTime);
for (final Long vmId : mostlikelyStoppedVMs) {
final VMInstanceVO vm = _vmDao.findById(vmId);
assert vm != null;
handlePowerOffReportWithNoPendingJobsOnVM(vm);
}
final List<Long> vmsWithRecentReport = listVMInTransitionStateWithRecentReportOnUpHost(hostId, cutTime);
for (final Long vmId : vmsWithRecentReport) {
final VMInstanceVO vm = _vmDao.findById(vmId);
assert vm != null;
if (vm.getPowerState() == PowerState.PowerOn) {
handlePowerOnReportWithNoPendingJobsOnVM(vm);
} else {
handlePowerOffReportWithNoPendingJobsOnVM(vm);
}
}
}
private void scanStalledVMInTransitionStateOnDisconnectedHosts() {
final Date cutTime = new Date(DateUtil.currentGMTTime().getTime() - VmOpWaitInterval.value() * 1000);
final List<Long> stuckAndUncontrollableVMs = listStalledVMInTransitionStateOnDisconnectedHosts(cutTime);
for (final Long vmId : stuckAndUncontrollableVMs) {
final VMInstanceVO vm = _vmDao.findById(vmId);
// We now only alert administrator about this situation
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_SYNC, vm.getDataCenterId(), vm.getPodIdToDeployIn(),
VM_SYNC_ALERT_SUBJECT, "VM " + vm.getHostName() + "(" + vm.getInstanceName() + ") is stuck in " + vm.getState()
+ " state and its host is unreachable for too long");
}
}
// VMs that in transitional state without recent power state report
private List<Long> listStalledVMInTransitionStateOnUpHost(final long hostId, final Date cutTime) {
final String sql = "SELECT i.* FROM vm_instance as i, host as h WHERE h.status = 'UP' " +
"AND h.id = ? AND i.power_state_update_time < ? AND i.host_id = h.id " +
"AND (i.state ='Starting' OR i.state='Stopping' OR i.state='Migrating') " +
"AND i.id NOT IN (SELECT w.vm_instance_id FROM vm_work_job AS w JOIN async_job AS j ON w.id = j.id WHERE j.job_status = ?)" +
"AND i.removed IS NULL";
final List<Long> l = new ArrayList<Long>();
TransactionLegacy txn = null;
try {
txn = TransactionLegacy.open(TransactionLegacy.CLOUD_DB);
PreparedStatement pstmt = null;
try {
pstmt = txn.prepareAutoCloseStatement(sql);
pstmt.setLong(1, hostId);
pstmt.setString(2, DateUtil.getDateDisplayString(TimeZone.getTimeZone("GMT"), cutTime));
pstmt.setInt(3, JobInfo.Status.IN_PROGRESS.ordinal());
final ResultSet rs = pstmt.executeQuery();
while (rs.next()) {
l.add(rs.getLong(1));
}
} catch (final SQLException e) {
} catch (final Throwable e) {
}
} finally {
if (txn != null) {
txn.close();
}
}
return l;
}
// VMs that in transitional state and recently have power state update
private List<Long> listVMInTransitionStateWithRecentReportOnUpHost(final long hostId, final Date cutTime) {
final String sql = "SELECT i.* FROM vm_instance as i, host as h WHERE h.status = 'UP' " +
"AND h.id = ? AND i.power_state_update_time > ? AND i.host_id = h.id " +
"AND (i.state ='Starting' OR i.state='Stopping' OR i.state='Migrating') " +
"AND i.id NOT IN (SELECT w.vm_instance_id FROM vm_work_job AS w JOIN async_job AS j ON w.id = j.id WHERE j.job_status = ?)" +
"AND i.removed IS NULL";
final List<Long> l = new ArrayList<Long>();
TransactionLegacy txn = null;
try {
txn = TransactionLegacy.open(TransactionLegacy.CLOUD_DB);
PreparedStatement pstmt = null;
try {
pstmt = txn.prepareAutoCloseStatement(sql);
pstmt.setLong(1, hostId);
pstmt.setString(2, DateUtil.getDateDisplayString(TimeZone.getTimeZone("GMT"), cutTime));
pstmt.setInt(3, JobInfo.Status.IN_PROGRESS.ordinal());
final ResultSet rs = pstmt.executeQuery();
while (rs.next()) {
l.add(rs.getLong(1));
}
} catch (final SQLException e) {
} catch (final Throwable e) {
}
return l;
} finally {
if (txn != null) {
txn.close();
}
}
}
private List<Long> listStalledVMInTransitionStateOnDisconnectedHosts(final Date cutTime) {
final String sql = "SELECT i.* FROM vm_instance as i, host as h WHERE h.status != 'UP' " +
"AND i.power_state_update_time < ? AND i.host_id = h.id " +
"AND (i.state ='Starting' OR i.state='Stopping' OR i.state='Migrating') " +
"AND i.id NOT IN (SELECT w.vm_instance_id FROM vm_work_job AS w JOIN async_job AS j ON w.id = j.id WHERE j.job_status = ?)" +
"AND i.removed IS NULL";
final List<Long> l = new ArrayList<Long>();
TransactionLegacy txn = null;
try {
txn = TransactionLegacy.open(TransactionLegacy.CLOUD_DB);
PreparedStatement pstmt = null;
try {
pstmt = txn.prepareAutoCloseStatement(sql);
pstmt.setString(1, DateUtil.getDateDisplayString(TimeZone.getTimeZone("GMT"), cutTime));
pstmt.setInt(2, JobInfo.Status.IN_PROGRESS.ordinal());
final ResultSet rs = pstmt.executeQuery();
while (rs.next()) {
l.add(rs.getLong(1));
}
} catch (final SQLException e) {
} catch (final Throwable e) {
}
return l;
} finally {
if (txn != null) {
txn.close();
}
}
}
//
// VM operation based on new sync model
//
public class VmStateSyncOutcome extends OutcomeImpl<VirtualMachine> {
private long _vmId;
public VmStateSyncOutcome(final AsyncJob job, final PowerState desiredPowerState, final long vmId, final Long srcHostIdForMigration) {
super(VirtualMachine.class, job, VmJobCheckInterval.value(), new Predicate() {
@Override
public boolean checkCondition() {
final AsyncJobVO jobVo = _entityMgr.findById(AsyncJobVO.class, job.getId());
assert jobVo != null;
if (jobVo == null || jobVo.getStatus() != JobInfo.Status.IN_PROGRESS) {
return true;
}
return false;
}
}, Topics.VM_POWER_STATE, AsyncJob.Topics.JOB_STATE);
_vmId = vmId;
}
@Override
protected VirtualMachine retrieve() {
return _vmDao.findById(_vmId);
}
}
public class VmJobVirtualMachineOutcome extends OutcomeImpl<VirtualMachine> {
private long _vmId;
public VmJobVirtualMachineOutcome(final AsyncJob job, final long vmId) {
super(VirtualMachine.class, job, VmJobCheckInterval.value(), new Predicate() {
@Override
public boolean checkCondition() {
final AsyncJobVO jobVo = _entityMgr.findById(AsyncJobVO.class, job.getId());
assert jobVo != null;
if (jobVo == null || jobVo.getStatus() != JobInfo.Status.IN_PROGRESS) {
return true;
}
return false;
}
}, AsyncJob.Topics.JOB_STATE);
_vmId = vmId;
}
@Override
protected VirtualMachine retrieve() {
return _vmDao.findById(_vmId);
}
}
//
// TODO build a common pattern to reduce code duplication in following methods
// no time for this at current iteration
//
public Outcome<VirtualMachine> startVmThroughJobQueue(final String vmUuid,
final Map<VirtualMachineProfile.Param, Object> params,
final DeploymentPlan planToDeploy, final DeploymentPlanner planner) {
final CallContext context = CallContext.current();
final User callingUser = context.getCallingUser();
final Account callingAccount = context.getCallingAccount();
final VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
VmWorkJobVO workJob = null;
final List<VmWorkJobVO> pendingWorkJobs = _workJobDao.listPendingWorkJobs(VirtualMachine.Type.Instance,
vm.getId(), VmWorkStart.class.getName());
if (pendingWorkJobs.size() > 0) {
assert pendingWorkJobs.size() == 1;
workJob = pendingWorkJobs.get(0);
} else {
workJob = new VmWorkJobVO(context.getContextId());
workJob.setDispatcher(VmWorkConstants.VM_WORK_JOB_DISPATCHER);
workJob.setCmd(VmWorkStart.class.getName());
workJob.setAccountId(callingAccount.getId());
workJob.setUserId(callingUser.getId());
workJob.setStep(VmWorkJobVO.Step.Starting);
workJob.setVmType(VirtualMachine.Type.Instance);
workJob.setVmInstanceId(vm.getId());
workJob.setRelated(AsyncJobExecutionContext.getOriginJobId());
// save work context info (there are some duplications)
final VmWorkStart workInfo = new VmWorkStart(callingUser.getId(), callingAccount.getId(), vm.getId(), VirtualMachineManagerImpl.VM_WORK_JOB_HANDLER);
workInfo.setPlan(planToDeploy);
workInfo.setParams(params);
if (planner != null) {
workInfo.setDeploymentPlanner(planner.getName());
}
workJob.setCmdInfo(VmWorkSerializer.serialize(workInfo));
_jobMgr.submitAsyncJob(workJob, VmWorkConstants.VM_WORK_QUEUE, vm.getId());
}
AsyncJobExecutionContext.getCurrentExecutionContext().joinJob(workJob.getId());
return new VmStateSyncOutcome(workJob,
VirtualMachine.PowerState.PowerOn, vm.getId(), null);
}
public Outcome<VirtualMachine> stopVmThroughJobQueue(final String vmUuid, final boolean cleanup) {
final CallContext context = CallContext.current();
final Account account = context.getCallingAccount();
final User user = context.getCallingUser();
final VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
final List<VmWorkJobVO> pendingWorkJobs = _workJobDao.listPendingWorkJobs(
vm.getType(), vm.getId(),
VmWorkStop.class.getName());
VmWorkJobVO workJob = null;
if (pendingWorkJobs != null && pendingWorkJobs.size() > 0) {
assert pendingWorkJobs.size() == 1;
workJob = pendingWorkJobs.get(0);
} else {
workJob = new VmWorkJobVO(context.getContextId());
workJob.setDispatcher(VmWorkConstants.VM_WORK_JOB_DISPATCHER);
workJob.setCmd(VmWorkStop.class.getName());
workJob.setAccountId(account.getId());
workJob.setUserId(user.getId());
workJob.setStep(VmWorkJobVO.Step.Prepare);
workJob.setVmType(VirtualMachine.Type.Instance);
workJob.setVmInstanceId(vm.getId());
workJob.setRelated(AsyncJobExecutionContext.getOriginJobId());
// save work context info (there are some duplications)
final VmWorkStop workInfo = new VmWorkStop(user.getId(), account.getId(), vm.getId(), VirtualMachineManagerImpl.VM_WORK_JOB_HANDLER, cleanup);
workJob.setCmdInfo(VmWorkSerializer.serialize(workInfo));
_jobMgr.submitAsyncJob(workJob, VmWorkConstants.VM_WORK_QUEUE, vm.getId());
}
AsyncJobExecutionContext.getCurrentExecutionContext().joinJob(workJob.getId());
return new VmStateSyncOutcome(workJob,
VirtualMachine.PowerState.PowerOff, vm.getId(), null);
}
public Outcome<VirtualMachine> rebootVmThroughJobQueue(final String vmUuid,
final Map<VirtualMachineProfile.Param, Object> params) {
final CallContext context = CallContext.current();
final Account account = context.getCallingAccount();
final User user = context.getCallingUser();
final VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
final List<VmWorkJobVO> pendingWorkJobs = _workJobDao.listPendingWorkJobs(
VirtualMachine.Type.Instance, vm.getId(),
VmWorkReboot.class.getName());
VmWorkJobVO workJob = null;
if (pendingWorkJobs != null && pendingWorkJobs.size() > 0) {
assert pendingWorkJobs.size() == 1;
workJob = pendingWorkJobs.get(0);
} else {
workJob = new VmWorkJobVO(context.getContextId());
workJob.setDispatcher(VmWorkConstants.VM_WORK_JOB_DISPATCHER);
workJob.setCmd(VmWorkReboot.class.getName());
workJob.setAccountId(account.getId());
workJob.setUserId(user.getId());
workJob.setStep(VmWorkJobVO.Step.Prepare);
workJob.setVmType(VirtualMachine.Type.Instance);
workJob.setVmInstanceId(vm.getId());
workJob.setRelated(AsyncJobExecutionContext.getOriginJobId());
// save work context info (there are some duplications)
final VmWorkReboot workInfo = new VmWorkReboot(user.getId(), account.getId(), vm.getId(), VirtualMachineManagerImpl.VM_WORK_JOB_HANDLER, params);
workJob.setCmdInfo(VmWorkSerializer.serialize(workInfo));
_jobMgr.submitAsyncJob(workJob, VmWorkConstants.VM_WORK_QUEUE, vm.getId());
}
AsyncJobExecutionContext.getCurrentExecutionContext().joinJob(workJob.getId());
return new VmJobVirtualMachineOutcome(workJob,
vm.getId());
}
public Outcome<VirtualMachine> migrateVmThroughJobQueue(final String vmUuid, final long srcHostId, final DeployDestination dest) {
final CallContext context = CallContext.current();
final User user = context.getCallingUser();
final Account account = context.getCallingAccount();
final VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
final List<VmWorkJobVO> pendingWorkJobs = _workJobDao.listPendingWorkJobs(
VirtualMachine.Type.Instance, vm.getId(),
VmWorkMigrate.class.getName());
VmWorkJobVO workJob = null;
if (pendingWorkJobs != null && pendingWorkJobs.size() > 0) {
assert pendingWorkJobs.size() == 1;
workJob = pendingWorkJobs.get(0);
} else {
workJob = new VmWorkJobVO(context.getContextId());
workJob.setDispatcher(VmWorkConstants.VM_WORK_JOB_DISPATCHER);
workJob.setCmd(VmWorkMigrate.class.getName());
workJob.setAccountId(account.getId());
workJob.setUserId(user.getId());
workJob.setVmType(VirtualMachine.Type.Instance);
workJob.setVmInstanceId(vm.getId());
workJob.setRelated(AsyncJobExecutionContext.getOriginJobId());
// save work context info (there are some duplications)
final VmWorkMigrate workInfo = new VmWorkMigrate(user.getId(), account.getId(), vm.getId(), VirtualMachineManagerImpl.VM_WORK_JOB_HANDLER, srcHostId, dest);
workJob.setCmdInfo(VmWorkSerializer.serialize(workInfo));
_jobMgr.submitAsyncJob(workJob, VmWorkConstants.VM_WORK_QUEUE, vm.getId());
}
AsyncJobExecutionContext.getCurrentExecutionContext().joinJob(workJob.getId());
return new VmStateSyncOutcome(workJob,
VirtualMachine.PowerState.PowerOn, vm.getId(), vm.getPowerHostId());
}
public Outcome<VirtualMachine> migrateVmAwayThroughJobQueue(final String vmUuid, final long srcHostId) {
final CallContext context = CallContext.current();
final User user = context.getCallingUser();
final Account account = context.getCallingAccount();
final VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
final List<VmWorkJobVO> pendingWorkJobs = _workJobDao.listPendingWorkJobs(
VirtualMachine.Type.Instance, vm.getId(),
VmWorkMigrateAway.class.getName());
VmWorkJobVO workJob = null;
if (pendingWorkJobs != null && pendingWorkJobs.size() > 0) {
assert pendingWorkJobs.size() == 1;
workJob = pendingWorkJobs.get(0);
} else {
workJob = new VmWorkJobVO(context.getContextId());
workJob.setDispatcher(VmWorkConstants.VM_WORK_JOB_DISPATCHER);
workJob.setCmd(VmWorkMigrateAway.class.getName());
workJob.setAccountId(account.getId());
workJob.setUserId(user.getId());
workJob.setVmType(VirtualMachine.Type.Instance);
workJob.setVmInstanceId(vm.getId());
workJob.setRelated(AsyncJobExecutionContext.getOriginJobId());
// save work context info (there are some duplications)
final VmWorkMigrateAway workInfo = new VmWorkMigrateAway(user.getId(), account.getId(), vm.getId(), VirtualMachineManagerImpl.VM_WORK_JOB_HANDLER, srcHostId);
workJob.setCmdInfo(VmWorkSerializer.serialize(workInfo));
}
_jobMgr.submitAsyncJob(workJob, VmWorkConstants.VM_WORK_QUEUE, vm.getId());
AsyncJobExecutionContext.getCurrentExecutionContext().joinJob(workJob.getId());
return new VmStateSyncOutcome(workJob, VirtualMachine.PowerState.PowerOn, vm.getId(), vm.getPowerHostId());
}
public Outcome<VirtualMachine> migrateVmWithStorageThroughJobQueue(
final String vmUuid, final long srcHostId, final long destHostId,
final Map<Long, Long> volumeToPool) {
final CallContext context = CallContext.current();
final User user = context.getCallingUser();
final Account account = context.getCallingAccount();
final VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
final List<VmWorkJobVO> pendingWorkJobs = _workJobDao.listPendingWorkJobs(
VirtualMachine.Type.Instance, vm.getId(),
VmWorkMigrateWithStorage.class.getName());
VmWorkJobVO workJob = null;
if (pendingWorkJobs != null && pendingWorkJobs.size() > 0) {
assert pendingWorkJobs.size() == 1;
workJob = pendingWorkJobs.get(0);
} else {
workJob = new VmWorkJobVO(context.getContextId());
workJob.setDispatcher(VmWorkConstants.VM_WORK_JOB_DISPATCHER);
workJob.setCmd(VmWorkMigrateWithStorage.class.getName());
workJob.setAccountId(account.getId());
workJob.setUserId(user.getId());
workJob.setVmType(VirtualMachine.Type.Instance);
workJob.setVmInstanceId(vm.getId());
workJob.setRelated(AsyncJobExecutionContext.getOriginJobId());
// save work context info (there are some duplications)
final VmWorkMigrateWithStorage workInfo = new VmWorkMigrateWithStorage(user.getId(), account.getId(), vm.getId(),
VirtualMachineManagerImpl.VM_WORK_JOB_HANDLER, srcHostId, destHostId, volumeToPool);
workJob.setCmdInfo(VmWorkSerializer.serialize(workInfo));
_jobMgr.submitAsyncJob(workJob, VmWorkConstants.VM_WORK_QUEUE, vm.getId());
}
AsyncJobExecutionContext.getCurrentExecutionContext().joinJob(workJob.getId());
return new VmStateSyncOutcome(workJob,
VirtualMachine.PowerState.PowerOn, vm.getId(), destHostId);
}
public Outcome<VirtualMachine> migrateVmForScaleThroughJobQueue(
final String vmUuid, final long srcHostId, final DeployDestination dest, final Long newSvcOfferingId) {
final CallContext context = CallContext.current();
final User user = context.getCallingUser();
final Account account = context.getCallingAccount();
final VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
final List<VmWorkJobVO> pendingWorkJobs = _workJobDao.listPendingWorkJobs(
VirtualMachine.Type.Instance, vm.getId(),
VmWorkMigrateForScale.class.getName());
VmWorkJobVO workJob = null;
if (pendingWorkJobs != null && pendingWorkJobs.size() > 0) {
assert pendingWorkJobs.size() == 1;
workJob = pendingWorkJobs.get(0);
} else {
workJob = new VmWorkJobVO(context.getContextId());
workJob.setDispatcher(VmWorkConstants.VM_WORK_JOB_DISPATCHER);
workJob.setCmd(VmWorkMigrateForScale.class.getName());
workJob.setAccountId(account.getId());
workJob.setUserId(user.getId());
workJob.setVmType(VirtualMachine.Type.Instance);
workJob.setVmInstanceId(vm.getId());
workJob.setRelated(AsyncJobExecutionContext.getOriginJobId());
// save work context info (there are some duplications)
final VmWorkMigrateForScale workInfo = new VmWorkMigrateForScale(user.getId(), account.getId(), vm.getId(),
VirtualMachineManagerImpl.VM_WORK_JOB_HANDLER, srcHostId, dest, newSvcOfferingId);
workJob.setCmdInfo(VmWorkSerializer.serialize(workInfo));
_jobMgr.submitAsyncJob(workJob, VmWorkConstants.VM_WORK_QUEUE, vm.getId());
}
AsyncJobExecutionContext.getCurrentExecutionContext().joinJob(workJob.getId());
return new VmJobVirtualMachineOutcome(workJob, vm.getId());
}
public Outcome<VirtualMachine> migrateVmStorageThroughJobQueue(
final String vmUuid, final StoragePool destPool) {
final CallContext context = CallContext.current();
final User user = context.getCallingUser();
final Account account = context.getCallingAccount();
final VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
final List<VmWorkJobVO> pendingWorkJobs = _workJobDao.listPendingWorkJobs(
VirtualMachine.Type.Instance, vm.getId(),
VmWorkStorageMigration.class.getName());
VmWorkJobVO workJob = null;
if (pendingWorkJobs != null && pendingWorkJobs.size() > 0) {
assert pendingWorkJobs.size() == 1;
workJob = pendingWorkJobs.get(0);
} else {
workJob = new VmWorkJobVO(context.getContextId());
workJob.setDispatcher(VmWorkConstants.VM_WORK_JOB_DISPATCHER);
workJob.setCmd(VmWorkStorageMigration.class.getName());
workJob.setAccountId(account.getId());
workJob.setUserId(user.getId());
workJob.setVmType(VirtualMachine.Type.Instance);
workJob.setVmInstanceId(vm.getId());
workJob.setRelated(AsyncJobExecutionContext.getOriginJobId());
// save work context info (there are some duplications)
final VmWorkStorageMigration workInfo = new VmWorkStorageMigration(user.getId(), account.getId(), vm.getId(),
VirtualMachineManagerImpl.VM_WORK_JOB_HANDLER, destPool.getId());
workJob.setCmdInfo(VmWorkSerializer.serialize(workInfo));
_jobMgr.submitAsyncJob(workJob, VmWorkConstants.VM_WORK_QUEUE, vm.getId());
}
AsyncJobExecutionContext.getCurrentExecutionContext().joinJob(workJob.getId());
return new VmJobVirtualMachineOutcome(workJob, vm.getId());
}
public Outcome<VirtualMachine> addVmToNetworkThroughJobQueue(
final VirtualMachine vm, final Network network, final NicProfile requested) {
final CallContext context = CallContext.current();
final User user = context.getCallingUser();
final Account account = context.getCallingAccount();
final List<VmWorkJobVO> pendingWorkJobs = _workJobDao.listPendingWorkJobs(
VirtualMachine.Type.Instance, vm.getId(),
VmWorkAddVmToNetwork.class.getName());
VmWorkJobVO workJob = null;
if (pendingWorkJobs != null && pendingWorkJobs.size() > 0) {
assert pendingWorkJobs.size() == 1;
workJob = pendingWorkJobs.get(0);
} else {
workJob = new VmWorkJobVO(context.getContextId());
workJob.setDispatcher(VmWorkConstants.VM_WORK_JOB_DISPATCHER);
workJob.setCmd(VmWorkAddVmToNetwork.class.getName());
workJob.setAccountId(account.getId());
workJob.setUserId(user.getId());
workJob.setVmType(VirtualMachine.Type.Instance);
workJob.setVmInstanceId(vm.getId());
workJob.setRelated(AsyncJobExecutionContext.getOriginJobId());
// save work context info (there are some duplications)
final VmWorkAddVmToNetwork workInfo = new VmWorkAddVmToNetwork(user.getId(), account.getId(), vm.getId(),
VirtualMachineManagerImpl.VM_WORK_JOB_HANDLER, network.getId(), requested);
workJob.setCmdInfo(VmWorkSerializer.serialize(workInfo));
_jobMgr.submitAsyncJob(workJob, VmWorkConstants.VM_WORK_QUEUE, vm.getId());
}
AsyncJobExecutionContext.getCurrentExecutionContext().joinJob(workJob.getId());
return new VmJobVirtualMachineOutcome(workJob, vm.getId());
}
public Outcome<VirtualMachine> removeNicFromVmThroughJobQueue(
final VirtualMachine vm, final Nic nic) {
final CallContext context = CallContext.current();
final User user = context.getCallingUser();
final Account account = context.getCallingAccount();
final List<VmWorkJobVO> pendingWorkJobs = _workJobDao.listPendingWorkJobs(
VirtualMachine.Type.Instance, vm.getId(),
VmWorkRemoveNicFromVm.class.getName());
VmWorkJobVO workJob = null;
if (pendingWorkJobs != null && pendingWorkJobs.size() > 0) {
assert pendingWorkJobs.size() == 1;
workJob = pendingWorkJobs.get(0);
} else {
workJob = new VmWorkJobVO(context.getContextId());
workJob.setDispatcher(VmWorkConstants.VM_WORK_JOB_DISPATCHER);
workJob.setCmd(VmWorkRemoveNicFromVm.class.getName());
workJob.setAccountId(account.getId());
workJob.setUserId(user.getId());
workJob.setVmType(VirtualMachine.Type.Instance);
workJob.setVmInstanceId(vm.getId());
workJob.setRelated(AsyncJobExecutionContext.getOriginJobId());
// save work context info (there are some duplications)
final VmWorkRemoveNicFromVm workInfo = new VmWorkRemoveNicFromVm(user.getId(), account.getId(), vm.getId(),
VirtualMachineManagerImpl.VM_WORK_JOB_HANDLER, nic.getId());
workJob.setCmdInfo(VmWorkSerializer.serialize(workInfo));
_jobMgr.submitAsyncJob(workJob, VmWorkConstants.VM_WORK_QUEUE, vm.getId());
}
AsyncJobExecutionContext.getCurrentExecutionContext().joinJob(workJob.getId());
return new VmJobVirtualMachineOutcome(workJob, vm.getId());
}
public Outcome<VirtualMachine> removeVmFromNetworkThroughJobQueue(
final VirtualMachine vm, final Network network, final URI broadcastUri) {
final CallContext context = CallContext.current();
final User user = context.getCallingUser();
final Account account = context.getCallingAccount();
final List<VmWorkJobVO> pendingWorkJobs = _workJobDao.listPendingWorkJobs(
VirtualMachine.Type.Instance, vm.getId(),
VmWorkRemoveVmFromNetwork.class.getName());
VmWorkJobVO workJob = null;
if (pendingWorkJobs != null && pendingWorkJobs.size() > 0) {
assert pendingWorkJobs.size() == 1;
workJob = pendingWorkJobs.get(0);
} else {
workJob = new VmWorkJobVO(context.getContextId());
workJob.setDispatcher(VmWorkConstants.VM_WORK_JOB_DISPATCHER);
workJob.setCmd(VmWorkRemoveVmFromNetwork.class.getName());
workJob.setAccountId(account.getId());
workJob.setUserId(user.getId());
workJob.setVmType(VirtualMachine.Type.Instance);
workJob.setVmInstanceId(vm.getId());
workJob.setRelated(AsyncJobExecutionContext.getOriginJobId());
// save work context info (there are some duplications)
final VmWorkRemoveVmFromNetwork workInfo = new VmWorkRemoveVmFromNetwork(user.getId(), account.getId(), vm.getId(),
VirtualMachineManagerImpl.VM_WORK_JOB_HANDLER, network, broadcastUri);
workJob.setCmdInfo(VmWorkSerializer.serialize(workInfo));
_jobMgr.submitAsyncJob(workJob, VmWorkConstants.VM_WORK_QUEUE, vm.getId());
}
AsyncJobExecutionContext.getCurrentExecutionContext().joinJob(workJob.getId());
return new VmJobVirtualMachineOutcome(workJob, vm.getId());
}
public Outcome<VirtualMachine> reconfigureVmThroughJobQueue(
final String vmUuid, final ServiceOffering newServiceOffering, final boolean reconfiguringOnExistingHost) {
final CallContext context = CallContext.current();
final User user = context.getCallingUser();
final Account account = context.getCallingAccount();
final VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
final List<VmWorkJobVO> pendingWorkJobs = _workJobDao.listPendingWorkJobs(
VirtualMachine.Type.Instance, vm.getId(),
VmWorkReconfigure.class.getName());
VmWorkJobVO workJob = null;
if (pendingWorkJobs != null && pendingWorkJobs.size() > 0) {
assert pendingWorkJobs.size() == 1;
workJob = pendingWorkJobs.get(0);
} else {
workJob = new VmWorkJobVO(context.getContextId());
workJob.setDispatcher(VmWorkConstants.VM_WORK_JOB_DISPATCHER);
workJob.setCmd(VmWorkReconfigure.class.getName());
workJob.setAccountId(account.getId());
workJob.setUserId(user.getId());
workJob.setVmType(VirtualMachine.Type.Instance);
workJob.setVmInstanceId(vm.getId());
workJob.setRelated(AsyncJobExecutionContext.getOriginJobId());
// save work context info (there are some duplications)
final VmWorkReconfigure workInfo = new VmWorkReconfigure(user.getId(), account.getId(), vm.getId(),
VirtualMachineManagerImpl.VM_WORK_JOB_HANDLER, newServiceOffering.getId(), reconfiguringOnExistingHost);
workJob.setCmdInfo(VmWorkSerializer.serialize(workInfo));
_jobMgr.submitAsyncJob(workJob, VmWorkConstants.VM_WORK_QUEUE, vm.getId());
}
AsyncJobExecutionContext.getCurrentExecutionContext().joinJob(workJob.getId());
return new VmJobVirtualMachineOutcome(workJob, vm.getId());
}
@ReflectionUse
private Pair<JobInfo.Status, String> orchestrateStart(final VmWorkStart work) throws Exception {
final VMInstanceVO vm = _entityMgr.findById(VMInstanceVO.class, work.getVmId());
if (vm == null) {
s_logger.info("Unable to find vm " + work.getVmId());
}
assert vm != null;
try{
orchestrateStart(vm.getUuid(), work.getParams(), work.getPlan(), _dpMgr.getDeploymentPlannerByName(work.getDeploymentPlanner()));
}
catch (CloudRuntimeException e){
e.printStackTrace();
s_logger.info("Caught CloudRuntimeException, returning job failed " + e);
CloudRuntimeException ex = new CloudRuntimeException("Unable to start VM instance");
return new Pair<JobInfo.Status, String>(JobInfo.Status.FAILED, JobSerializerHelper.toObjectSerializedString(ex));
}
return new Pair<JobInfo.Status, String>(JobInfo.Status.SUCCEEDED, null);
}
@ReflectionUse
private Pair<JobInfo.Status, String> orchestrateStop(final VmWorkStop work) throws Exception {
final VMInstanceVO vm = _entityMgr.findById(VMInstanceVO.class, work.getVmId());
if (vm == null) {
s_logger.info("Unable to find vm " + work.getVmId());
throw new CloudRuntimeException("Unable to find VM id=" + work.getVmId());
}
orchestrateStop(vm.getUuid(), work.isCleanup());
return new Pair<JobInfo.Status, String>(JobInfo.Status.SUCCEEDED, null);
}
@ReflectionUse
private Pair<JobInfo.Status, String> orchestrateMigrate(final VmWorkMigrate work) throws Exception {
final VMInstanceVO vm = _entityMgr.findById(VMInstanceVO.class, work.getVmId());
if (vm == null) {
s_logger.info("Unable to find vm " + work.getVmId());
}
assert vm != null;
orchestrateMigrate(vm.getUuid(), work.getSrcHostId(), work.getDeployDestination());
return new Pair<JobInfo.Status, String>(JobInfo.Status.SUCCEEDED, null);
}
@ReflectionUse
private Pair<JobInfo.Status, String> orchestrateMigrateAway(final VmWorkMigrateAway work) throws Exception {
final VMInstanceVO vm = _entityMgr.findById(VMInstanceVO.class, work.getVmId());
if (vm == null) {
s_logger.info("Unable to find vm " + work.getVmId());
}
assert vm != null;
try {
orchestrateMigrateAway(vm.getUuid(), work.getSrcHostId(), null);
} catch (final InsufficientServerCapacityException e) {
s_logger.warn("Failed to deploy vm " + vm.getId() + " with original planner, sending HAPlanner");
orchestrateMigrateAway(vm.getUuid(), work.getSrcHostId(), _haMgr.getHAPlanner());
}
return new Pair<JobInfo.Status, String>(JobInfo.Status.SUCCEEDED, null);
}
@ReflectionUse
private Pair<JobInfo.Status, String> orchestrateMigrateWithStorage(final VmWorkMigrateWithStorage work) throws Exception {
final VMInstanceVO vm = _entityMgr.findById(VMInstanceVO.class, work.getVmId());
if (vm == null) {
s_logger.info("Unable to find vm " + work.getVmId());
}
assert vm != null;
orchestrateMigrateWithStorage(vm.getUuid(),
work.getSrcHostId(),
work.getDestHostId(),
work.getVolumeToPool());
return new Pair<JobInfo.Status, String>(JobInfo.Status.SUCCEEDED, null);
}
@ReflectionUse
private Pair<JobInfo.Status, String> orchestrateMigrateForScale(final VmWorkMigrateForScale work) throws Exception {
final VMInstanceVO vm = _entityMgr.findById(VMInstanceVO.class, work.getVmId());
if (vm == null) {
s_logger.info("Unable to find vm " + work.getVmId());
}
assert vm != null;
orchestrateMigrateForScale(vm.getUuid(),
work.getSrcHostId(),
work.getDeployDestination(),
work.getNewServiceOfferringId());
return new Pair<JobInfo.Status, String>(JobInfo.Status.SUCCEEDED, null);
}
@ReflectionUse
private Pair<JobInfo.Status, String> orchestrateReboot(final VmWorkReboot work) throws Exception {
final VMInstanceVO vm = _entityMgr.findById(VMInstanceVO.class, work.getVmId());
if (vm == null) {
s_logger.info("Unable to find vm " + work.getVmId());
}
assert vm != null;
orchestrateReboot(vm.getUuid(), work.getParams());
return new Pair<JobInfo.Status, String>(JobInfo.Status.SUCCEEDED, null);
}
@ReflectionUse
private Pair<JobInfo.Status, String> orchestrateAddVmToNetwork(final VmWorkAddVmToNetwork work) throws Exception {
final VMInstanceVO vm = _entityMgr.findById(VMInstanceVO.class, work.getVmId());
if (vm == null) {
s_logger.info("Unable to find vm " + work.getVmId());
}
assert vm != null;
final Network network = _networkDao.findById(work.getNetworkId());
final NicProfile nic = orchestrateAddVmToNetwork(vm, network,
work.getRequestedNicProfile());
return new Pair<JobInfo.Status, String>(JobInfo.Status.SUCCEEDED, _jobMgr.marshallResultObject(nic));
}
@ReflectionUse
private Pair<JobInfo.Status, String> orchestrateRemoveNicFromVm(final VmWorkRemoveNicFromVm work) throws Exception {
final VMInstanceVO vm = _entityMgr.findById(VMInstanceVO.class, work.getVmId());
if (vm == null) {
s_logger.info("Unable to find vm " + work.getVmId());
}
assert vm != null;
final NicVO nic = _entityMgr.findById(NicVO.class, work.getNicId());
final boolean result = orchestrateRemoveNicFromVm(vm, nic);
return new Pair<JobInfo.Status, String>(JobInfo.Status.SUCCEEDED,
_jobMgr.marshallResultObject(result));
}
@ReflectionUse
private Pair<JobInfo.Status, String> orchestrateRemoveVmFromNetwork(final VmWorkRemoveVmFromNetwork work) throws Exception {
final VMInstanceVO vm = _entityMgr.findById(VMInstanceVO.class, work.getVmId());
if (vm == null) {
s_logger.info("Unable to find vm " + work.getVmId());
}
assert vm != null;
final boolean result = orchestrateRemoveVmFromNetwork(vm,
work.getNetwork(), work.getBroadcastUri());
return new Pair<JobInfo.Status, String>(JobInfo.Status.SUCCEEDED,
_jobMgr.marshallResultObject(result));
}
@ReflectionUse
private Pair<JobInfo.Status, String> orchestrateReconfigure(final VmWorkReconfigure work) throws Exception {
final VMInstanceVO vm = _entityMgr.findById(VMInstanceVO.class, work.getVmId());
if (vm == null) {
s_logger.info("Unable to find vm " + work.getVmId());
}
assert vm != null;
final ServiceOffering newServiceOffering = _offeringDao.findById(vm.getId(), work.getNewServiceOfferingId());
reConfigureVm(vm.getUuid(), newServiceOffering,
work.isSameHost());
return new Pair<JobInfo.Status, String>(JobInfo.Status.SUCCEEDED, null);
}
@ReflectionUse
private Pair<JobInfo.Status, String> orchestrateStorageMigration(final VmWorkStorageMigration work) throws Exception {
final VMInstanceVO vm = _entityMgr.findById(VMInstanceVO.class, work.getVmId());
if (vm == null) {
s_logger.info("Unable to find vm " + work.getVmId());
}
assert vm != null;
final StoragePool pool = (PrimaryDataStoreInfo)dataStoreMgr.getPrimaryDataStore(work.getDestStoragePoolId());
orchestrateStorageMigration(vm.getUuid(), pool);
return new Pair<JobInfo.Status, String>(JobInfo.Status.SUCCEEDED, null);
}
@Override
public Pair<JobInfo.Status, String> handleVmWorkJob(final VmWork work) throws Exception {
return _jobHandlerProxy.handleVmWorkJob(work);
}
private VmWorkJobVO createPlaceHolderWork(final long instanceId) {
final VmWorkJobVO workJob = new VmWorkJobVO("");
workJob.setDispatcher(VmWorkConstants.VM_WORK_JOB_PLACEHOLDER);
workJob.setCmd("");
workJob.setCmdInfo("");
workJob.setAccountId(0);
workJob.setUserId(0);
workJob.setStep(VmWorkJobVO.Step.Starting);
workJob.setVmType(VirtualMachine.Type.Instance);
workJob.setVmInstanceId(instanceId);
workJob.setInitMsid(ManagementServerNode.getManagementServerId());
_workJobDao.persist(workJob);
return workJob;
}
}