blob: a5192e25a63ed22c9d0e36daa6544e07f6220a09 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.cloud.storage;
import java.util.List;
import java.util.Map;
import javax.inject.Inject;
import com.cloud.exception.InsufficientCapacityException;
import com.cloud.exception.OperationTimedoutException;
import com.cloud.exception.ResourceUnavailableException;
import org.apache.cloudstack.engine.subsystem.api.storage.DataStore;
import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreManager;
import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreProviderManager;
import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao;
import org.apache.cloudstack.storage.datastore.db.StoragePoolDetailsDao;
import org.apache.cloudstack.storage.datastore.db.StoragePoolVO;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
import org.springframework.stereotype.Component;
import com.cloud.agent.AgentManager;
import com.cloud.agent.api.Answer;
import com.cloud.agent.api.ModifyStoragePoolAnswer;
import com.cloud.agent.api.ModifyStoragePoolCommand;
import com.cloud.alert.AlertManager;
import com.cloud.host.HostVO;
import com.cloud.host.Status;
import com.cloud.hypervisor.Hypervisor.HypervisorType;
import com.cloud.resource.ResourceManager;
import com.cloud.server.ManagementServer;
import com.cloud.storage.dao.StoragePoolHostDao;
import com.cloud.storage.dao.StoragePoolWorkDao;
import com.cloud.storage.dao.VolumeDao;
import com.cloud.user.dao.UserDao;
import com.cloud.utils.Pair;
import com.cloud.utils.exception.CloudRuntimeException;
import com.cloud.vm.VMInstanceVO;
import com.cloud.vm.VirtualMachine.State;
import com.cloud.vm.VirtualMachineManager;
import com.cloud.vm.dao.ConsoleProxyDao;
import com.cloud.vm.dao.DomainRouterDao;
import com.cloud.vm.dao.SecondaryStorageVmDao;
import com.cloud.vm.dao.UserVmDao;
import com.cloud.vm.dao.VMInstanceDao;
@Component
public class StoragePoolAutomationImpl implements StoragePoolAutomation {
protected Logger logger = LogManager.getLogger(getClass());
@Inject
protected VirtualMachineManager vmMgr;
@Inject
protected SecondaryStorageVmDao _secStrgDao;
@Inject
UserVmDao userVmDao;
@Inject
protected UserDao _userDao;
@Inject
protected DomainRouterDao _domrDao;
@Inject
protected StoragePoolHostDao _storagePoolHostDao;
@Inject
protected AlertManager _alertMgr;
@Inject
protected ConsoleProxyDao _consoleProxyDao;
@Inject
protected StoragePoolWorkDao _storagePoolWorkDao;
@Inject
PrimaryDataStoreDao primaryDataStoreDao;
@Inject
StoragePoolDetailsDao storagePoolDetailsDao;
@Inject
DataStoreManager dataStoreMgr;
@Inject
protected ResourceManager _resourceMgr;
@Inject
AgentManager agentMgr;
@Inject
VolumeDao volumeDao;
@Inject
VMInstanceDao vmDao;
@Inject
ManagementServer server;
@Inject
DataStoreProviderManager providerMgr;
@Inject
StorageManager storageManager;
@Override
public boolean maintain(DataStore store) {
return maintain(store, null);
}
@Override
public boolean maintain(DataStore store, Map<String,String> details) {
StoragePoolVO pool = primaryDataStoreDao.findById(store.getId());
try {
getStoragePoolForSpecification(pool);
List<HostVO> hosts = getHostsForStoragePool(pool);
if (setNextStateForMaintenance(hosts, pool) == StoragePoolStatus.PrepareForMaintenance) {
removeHeartbeatForHostsFromPool(hosts, pool);
// check to see if other ps exist
// if they do, then we can migrate over the system vms to them
// if they don't, then just stop all vms on this one
List<StoragePoolVO> upPools = primaryDataStoreDao.listByStatusInZone(pool.getDataCenterId(), StoragePoolStatus.Up);
boolean restart = !CollectionUtils.isEmpty(upPools);
// 2. Get a list of all the ROOT volumes within this storage pool
List<VolumeVO> allVolumes = volumeDao.findByPoolId(pool.getId());
// 3. Enqueue to the work queue
enqueueMigrationsForVolumes(allVolumes, pool);
// 4. Process the queue
processMigrationWorkloads(pool, restart);
}
} catch (Exception e) {
logger.error("Exception in enabling primary storage maintenance:", e);
pool.setStatus(StoragePoolStatus.ErrorInMaintenance);
primaryDataStoreDao.update(pool.getId(), pool);
// TODO decide on what recovery is possible
throw new CloudRuntimeException(e.getMessage());
}
return true;
}
@Override
public boolean cancelMaintain(DataStore store) {
return cancelMaintain(store, null);
}
@Override
public boolean cancelMaintain(DataStore store, Map<String,String> details) {
// Change the storage state back to up
StoragePoolVO poolVO = primaryDataStoreDao.findById(store.getId());
StoragePool pool = (StoragePool)store;
List<HostVO> hosts = getHostsForStoragePool(poolVO);
if (CollectionUtils.isEmpty(hosts)) {
return true;
}
Pair<Map<String, String>, Boolean> nfsMountOpts = storageManager.getStoragePoolNFSMountOpts(pool, null);
addHeartbeatToHostsInPool(hosts, pool, nfsMountOpts);
// 2. Get a list of pending work for this queue
List<StoragePoolWorkVO> pendingWork = _storagePoolWorkDao.listPendingWorkForCancelMaintenanceByPoolId(poolVO.getId());
// 3. work through the queue
cancelMigrationWorkloads(pendingWork);
return false;
}
private StoragePoolStatus setNextStateForMaintenance(List<HostVO> hosts, StoragePoolVO pool) {
if (CollectionUtils.isEmpty(hosts)) {
pool.setStatus(StoragePoolStatus.Maintenance);
primaryDataStoreDao.update(pool.getId(), pool);
return StoragePoolStatus.Maintenance;
} else {
// set the pool state to prepare for maintenance
pool.setStatus(StoragePoolStatus.PrepareForMaintenance);
primaryDataStoreDao.update(pool.getId(), pool);
return StoragePoolStatus.PrepareForMaintenance;
}
}
private void processMigrationWorkloads(StoragePoolVO pool, boolean restart) throws ResourceUnavailableException, OperationTimedoutException, InsufficientCapacityException {
List<StoragePoolWorkVO> pendingWork = _storagePoolWorkDao.listPendingWorkForPrepareForMaintenanceByPoolId(pool.getId());
for (StoragePoolWorkVO work : pendingWork) {
// shut down the running vms
VMInstanceVO vmInstance = vmDao.findById(work.getVmId());
if (vmInstance == null) {
continue;
}
switch (vmInstance.getType()) {
case ConsoleProxy:
case SecondaryStorageVm:
case DomainRouter:
handleVmMigration(restart, work, vmInstance);
break;
case User:
handleStopVmForMigration(work, vmInstance);
break;
}
}
}
private void cancelMigrationWorkloads(List<StoragePoolWorkVO> pendingWork) {
for (StoragePoolWorkVO work : pendingWork) {
try {
VMInstanceVO vmInstance = vmDao.findById(work.getVmId());
if (vmInstance == null) {
continue;
}
switch (vmInstance.getType()) {
case ConsoleProxy:
case SecondaryStorageVm:
case DomainRouter:
handleVmStart(work, vmInstance);
break;
case User:
handleUserVmStart(work, vmInstance);
break;
}
} catch (Exception e) {
logger.debug("Failed start vm", e);
throw new CloudRuntimeException(e.toString());
}
}
}
private void handleStopVmForMigration(StoragePoolWorkVO work, VMInstanceVO vmInstance) throws ResourceUnavailableException, OperationTimedoutException {
vmMgr.advanceStop(vmInstance.getUuid(), false);
// update work status
work.setStoppedForMaintenance(true);
_storagePoolWorkDao.update(work.getId(), work);
}
private void handleVmMigration(boolean restart, StoragePoolWorkVO work, VMInstanceVO vmInstance) throws ResourceUnavailableException, OperationTimedoutException, InsufficientCapacityException {
handleStopVmForMigration(work, vmInstance);
if (restart) {
handleVmStart(work, vmInstance);
}
}
private void handleVmStart(StoragePoolWorkVO work, VMInstanceVO vmInstance) throws InsufficientCapacityException, ResourceUnavailableException, OperationTimedoutException {
vmMgr.advanceStart(vmInstance.getUuid(), null, null);
// update work queue
work.setStartedAfterMaintenance(true);
_storagePoolWorkDao.update(work.getId(), work);
}
private void enqueueMigrationsForVolumes(List<VolumeVO> allVolumes, StoragePoolVO pool) {
for (VolumeVO volume : allVolumes) {
VMInstanceVO vmInstance = vmDao.findById(volume.getInstanceId());
if (vmInstance == null) {
continue;
}
// enqueue sp work
if (vmInstance.getState().equals(State.Running) || vmInstance.getState().equals(State.Starting) || vmInstance.getState().equals(State.Stopping)) {
try {
StoragePoolWorkVO work = new StoragePoolWorkVO(vmInstance.getId(), pool.getId(), false, false, server.getId());
_storagePoolWorkDao.persist(work);
} catch (Exception e) {
if (logger.isDebugEnabled()) {
logger.debug("Work record already exists, reusing by re-setting values");
}
StoragePoolWorkVO work = _storagePoolWorkDao.findByPoolIdAndVmId(pool.getId(), vmInstance.getId());
work.setStartedAfterMaintenance(false);
work.setStoppedForMaintenance(false);
work.setManagementServerId(server.getId());
_storagePoolWorkDao.update(work.getId(), work);
}
}
}
}
private void removeHeartbeatForHostsFromPool(List<HostVO> hosts, StoragePool storagePool) {
// remove heartbeat
for (HostVO host : hosts) {
ModifyStoragePoolCommand cmd = new ModifyStoragePoolCommand(false, storagePool);
final Answer answer = agentMgr.easySend(host.getId(), cmd);
if (answer == null || !answer.getResult()) {
if (logger.isDebugEnabled()) {
logger.debug("ModifyStoragePool false failed due to {}", ((answer == null) ? "answer null" : answer.getDetails()));
}
} else {
reportSucceededModifyStorePool(storagePool, (ModifyStoragePoolAnswer) answer, host, false);
}
}
}
private void reportSucceededModifyStorePool(StoragePool storagePool, ModifyStoragePoolAnswer answer, HostVO host, boolean add) {
if (logger.isDebugEnabled()) {
logger.debug("ModifyStoragePool succeeded for {}", add ? "adding" : "removing");
}
if (storagePool.getPoolType() == Storage.StoragePoolType.DatastoreCluster) {
logger.debug("Started synchronising datastore cluster storage pool {} with vCenter", storagePool);
storageManager.syncDatastoreClusterStoragePool(storagePool.getId(), answer.getDatastoreClusterChildren(), host.getId());
}
}
/**
* Handling the Zone wide and cluster wide primary storage
* if the storage scope is ZONE wide, then get all the hosts for which hypervisor ZoneWideStoragePools created to send ModifyStoragePoolCommand
* TODO: if it's zone wide, this code will list a lot of hosts in the zone, which may cause performance/OOM issue.
* @param pool pool to check for connected hosts
* @return a list of connected hosts
*/
private List<HostVO> getHostsForStoragePool(StoragePoolVO pool) {
List<HostVO> hosts;
if (pool.getScope().equals(ScopeType.ZONE)) {
if (HypervisorType.Any.equals(pool.getHypervisor())) {
hosts = _resourceMgr.listAllUpAndEnabledHostsInOneZone(pool.getDataCenterId());
}
else {
hosts = _resourceMgr.listAllUpAndEnabledHostsInOneZoneByHypervisor(pool.getHypervisor(), pool.getDataCenterId());
}
} else {
hosts = _resourceMgr.listHostsInClusterByStatus(pool.getClusterId(), Status.Up);
}
return hosts;
}
/**
* Handling Zone and Cluster wide storage scopes. Depending on the scope of the pool, check for other storage pools in the same scope
* If the storage is ZONE wide then we pass podId and cluster id as null as they will be empty for Zone wide storage
*
* @param pool pool to check for other pools in the same scope
*/
private void getStoragePoolForSpecification(StoragePoolVO pool) {
List<StoragePoolVO> storagePools;
if (pool.getScope() == ScopeType.ZONE) {
storagePools = primaryDataStoreDao.listBy(pool.getDataCenterId(), null, null, ScopeType.ZONE);
} else {
storagePools = primaryDataStoreDao.listBy(pool.getDataCenterId(), pool.getPodId(), pool.getClusterId(), ScopeType.CLUSTER);
}
checkHierarchyForPreparingForMaintenance(pool, storagePools);
}
/**
* If Datastore cluster is tried to prepare for maintenance then child storage pools are also kept in PrepareForMaintenance mode
* @param pool target to put in maintenance
* @param storagePools list of possible peers/parents/children
*/
private static void checkHierarchyForPreparingForMaintenance(StoragePoolVO pool, List<StoragePoolVO> storagePools) {
for (StoragePoolVO storagePool : storagePools) {
if (!(storagePool.getParent().equals(pool.getParent()) || !pool.getParent().equals(storagePool.getId())) &&
(storagePool.getStatus() == StoragePoolStatus.PrepareForMaintenance)) {
throw new CloudRuntimeException(String.format("Only one storage pool in a cluster can be in PrepareForMaintenance mode, %s is already in PrepareForMaintenance mode ", storagePool));
}
}
}
/**
* // check if the vm has a root volume. If not, remove the item from the queue, the vm should be
* // started only when it has at least one root volume attached to it
* // don't allow to start vm that doesn't have a root volume
* @param work work item to handle for this VM
* @param vmInstance VM to start
* @throws InsufficientCapacityException no migration target found
* @throws ResourceUnavailableException a resource required for migration is not in the expected state
* @throws OperationTimedoutException migration operation took too long
*/
private void handleUserVmStart(StoragePoolWorkVO work, VMInstanceVO vmInstance) throws InsufficientCapacityException, ResourceUnavailableException, OperationTimedoutException {
if (volumeDao.findByInstanceAndType(vmInstance.getId(), Volume.Type.ROOT).isEmpty()) {
_storagePoolWorkDao.remove(work.getId());
} else {
handleVmStart(work, vmInstance);
}
}
private void addHeartbeatToHostsInPool(List<HostVO> hosts, StoragePool pool, Pair<Map<String, String>, Boolean> nfsMountOpts) {
for (HostVO host : hosts) {
ModifyStoragePoolCommand msPoolCmd = new ModifyStoragePoolCommand(true, pool, nfsMountOpts.first());
final Answer answer = agentMgr.easySend(host.getId(), msPoolCmd);
if (answer == null || !answer.getResult()) {
if (logger.isDebugEnabled()) {
logger.debug("ModifyStoragePool add failed due to {}", ((answer == null) ? "answer null" : answer.getDetails()));
}
if (answer != null && nfsMountOpts.second()) {
logger.error("Unable to attach storage pool to the host {} due to {}", host, answer.getDetails());
StringBuilder exceptionSB = new StringBuilder("Unable to attach storage pool to the host ").append(host.getName());
String reason = storageManager.getStoragePoolMountFailureReason(answer.getDetails());
if (reason!= null) {
exceptionSB.append(". ").append(reason).append(".");
}
throw new CloudRuntimeException(exceptionSB.toString());
}
} else {
storageManager.updateStoragePoolHostVOAndBytes(pool, host.getId(), (ModifyStoragePoolAnswer) answer);
reportSucceededModifyStorePool(pool, (ModifyStoragePoolAnswer) answer, host, true);
}
}
}
}